def test_job_description_resources_schedulers():
    # exact # of cores and scheduler
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { "exact": 2, "scheduler": { "name": "split-into", "params": { "parts": 8  } } } } }"""
        Job(**json.loads(jobd))

    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "min": 2, "scheduler": { "name": "split-into", "params": { "parts": 8  } } } } }"""
    job = Job(**json.loads(jobd))
    assert job

    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "max": 3, "scheduler": { "name": "split-into", "params": { "parts": 8  } } } } }"""
    job = Job(**json.loads(jobd))
    assert job

    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "min": 2, "max": 3, "scheduler": { "name": "split-into", "params": { "parts": 8  } } } } }"""
    job = Job(**json.loads(jobd))
    assert job

    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "min": 2, "scheduler": { "name": "maximum-iters" } } } }"""
    job = Job(**json.loads(jobd))
    assert job
예제 #2
0
def test_local_error_duplicate_name_job_separate_reqs(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobName = 'mdate'
    jobs1 = [
        job.to_dict() for job in [
            Job(
                jobName,
                JobExecution('date',
                             wd=abspath(tmpdir.join('date.sandbox')),
                             stdout='date.out',
                             stderr='date.err'),
                JobResources(numCores=ResourceSize(1)))
        ]
    ]
    jobs2 = [
        job.to_dict() for job in [
            Job(
                jobName,
                JobExecution('sleep',
                             wd=abspath(tmpdir.join('sleep.sandbox')),
                             stdout='sleep.out',
                             stderr='sleep.err'),
                JobResources(numCores=ResourceSize(1)))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs1
    }, {
        'request': 'submit',
        'jobs': jobs2
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--file', '--file-path',
        str(file_path), '--nodes', '2', '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    # the first job (date) should execute
    check_job_status_in_json([jobName],
                             workdir=str(tmpdir),
                             dest_state='SUCCEED')
    assert all((isdir(abspath(tmpdir.join('date.sandbox'))),
                exists(join(abspath(tmpdir.join('date.sandbox')), 'date.out')),
                exists(join(abspath(tmpdir.join('date.sandbox')),
                            'date.err'))))

    # the second job (sleep) due to the name clash should not execute
    assert not isdir(abspath(tmpdir.join('sleep.sandbox')))
def test_job_description_attributes():
    # attributes serialization
    attrs = {'j1_name': 'j1', 'j1_var1': 'var1'}
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            attributes=attrs)
    assert all((len(j.attributes) == len(attrs), j.attributes == attrs))
    j_json = j.to_json()
    j_clone = Job(**json.loads(j_json))
    assert all(
        (len(j_clone.attributes) == len(attrs), j_clone.attributes == attrs))
    assert j.to_dict() == j_clone.to_dict()

    # attributes wrong format
    with pytest.raises(IllegalJobDescription):
        Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            attributes="some_illegal_attributes")
    with pytest.raises(IllegalJobDescription):
        Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            attributes=["some_illegal_attributes", "more_illegal_attributes"])
def test_job_description_serialization():
    # gpu cr
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date", "stdin": "in_file", "stdout": "out_file", "stderr": "err_file",
                "modules": [ "python/3.6" ], "venv": [ "venv-3.6" ] },
              "resources": { "numCores": { "min": 2, "max": 4}, "numNodes": { "min": 1, "max": 2}, "nodeCrs": { "gpu": 1 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with node consumable resources (gpu)"
    job_dict = job.to_dict()

    job_clone = json.loads(job.to_json())
    assert job_dict == job_clone
def test_joblist():
    jlist = JobList()

    # adding and removing jobs from list
    jlist.add(
        Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1)))
    assert jlist.exist('j1')
    assert jlist.get('j1').get_name() == 'j1'

    jlist.add(
        Job(name='j2',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1)))
    assert all((jlist.exist('j1'), jlist.exist('j2')))
    assert jlist.get('j2').get_name() == 'j2'

    with pytest.raises(JobAlreadyExist):
        jlist.add(
            Job(name='j2',
                execution=JobExecution(exec='/bin/date'),
                resources=JobResources(numCores=1)))

    with pytest.raises(JobAlreadyExist):
        jlist.add(
            Job(name='j1',
                execution=JobExecution(exec='/bin/date'),
                resources=JobResources(numCores=1)))

    jnames = jlist.jobs()
    assert all((len(jnames) == 2, 'j1' in jnames, 'j2' in jnames))

    jlist.remove('j2')
    assert not jlist.exist('j2')
    assert jlist.get('j2') is None

    jnames = jlist.jobs()
    assert all((len(jnames) == 1, 'j1' in jnames))

    # try to add something which is not a job
    with pytest.raises(Exception):
        jlist.add('another job')

    # parsing job iteration names
    assert JobList.parse_jobname('j1') == ('j1', None)
    assert JobList.parse_jobname('j1:1') == ('j1', 1)
    with pytest.raises(ValueError):
        assert JobList.parse_jobname('j1:2:1') == ('j1', '2:1')
    with pytest.raises(ValueError):
        assert JobList.parse_jobname('j1:') == ('j1', '')
def test_jobdescription_jobname():
    # job name ok
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1))
    assert j

    # missing job name
    with pytest.raises(IllegalJobDescription):
        Job(name=None,
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1))

    # illegal character in job name
    with pytest.raises(IllegalJobDescription):
        Job(name='j1:1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1))
예제 #7
0
def test_local_error_duplicate_name_job(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobName = 'mdate'
    jobs = [
        job.to_dict() for job in [
            Job(
                jobName,
                JobExecution('date',
                             wd=abspath(tmpdir.join('date.sandbox')),
                             stdout='date.out',
                             stderr='date.err'),
                JobResources(numCores=ResourceSize(1))),
            Job(
                jobName,
                JobExecution('sleep',
                             wd=abspath(tmpdir.join('sleep.sandbox')),
                             stdout='sleep.out',
                             stderr='sleep.err'),
                JobResources(numCores=ResourceSize(1)))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--file', '--file-path',
        str(file_path), '--nodes', '2', '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    # no job should be executed due to the failed submit request with non-unique jobs inside
    assert not isdir(abspath(tmpdir.join('date.sandbox')))
    assert not isdir(abspath(tmpdir.join('sleep.sandbox')))
예제 #8
0
def test_local_error_job_desc():
    # missing job execution
    with pytest.raises(IllegalJobDescription):
        Job('error_job',
            JobExecution(None, stdout='date.out', stderr='date.err'),
            JobResources(numCores=ResourceSize(1)))

    # wrong format of arguments
    with pytest.raises(IllegalJobDescription):
        Job(
            'error_job',
            JobExecution('date',
                         args='this should be a list',
                         stdout='date.out',
                         stderr='date.err'),
            JobResources(numCores=ResourceSize(1)))

    # wrong format of environment
    with pytest.raises(IllegalJobDescription):
        Job(
            'error_job',
            JobExecution('date',
                         args=['arg1'],
                         env=['this shuld be a dict'],
                         stdout='date.out',
                         stderr='date.err'),
            JobResources(numCores=ResourceSize(1)))

    # missing execution definition
    with pytest.raises(IllegalJobDescription):
        Job('error_job', None, JobResources(numCores=ResourceSize(1)))

    # missing resources definition
    with pytest.raises(IllegalJobDescription):
        Job(
            'error_job',
            JobExecution('date',
                         args=['arg1'],
                         env=['this shuld be a dict'],
                         stdout='date.out',
                         stderr='date.err'), None)
예제 #9
0
def test_local_simple_script_job(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobName = 'mdate_script'
    jobs = [
        job.to_dict() for job in [
            Job(
                jobName,
                JobExecution(script='/bin/date\n/bin/hostname\n',
                             wd=abspath(tmpdir.join('date.sandbox')),
                             stdout='date.out',
                             stderr='date.err'),
                JobResources(numCores=ResourceSize(1)))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--log', 'debug', '--file', '--file-path',
        str(file_path), '--nodes', '2', '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    check_job_status_in_json([jobName],
                             workdir=str(tmpdir),
                             dest_state='SUCCEED')
    assert all(
        (isdir(abspath(tmpdir.join('date.sandbox'))),
         exists(join(abspath(tmpdir.join('date.sandbox')), 'date.out')),
         exists(join(abspath(tmpdir.join('date.sandbox')), 'date.err')),
         stat(join(abspath(tmpdir.join('date.sandbox')), 'date.out')).st_size >
         0, stat(join(abspath(tmpdir.join('date.sandbox')),
                      'date.err')).st_size == 0))

    with pytest.raises(ValueError):
        check_job_status_in_json([jobName + 'xxx'],
                                 workdir=str(tmpdir),
                                 dest_state='SUCCEED')
예제 #10
0
def test_local_workflows_error(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobs = [
        job.to_dict() for job in [
            Job('first',
                JobExecution('sleep',
                             args=['2s'],
                             wd=abspath(tmpdir.join('first.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1)),
                dependencies=JobDependencies(after=['not-existing']))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--file', '--file-path',
        str(file_path), '--nodes', '2', '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    assert not exists(abspath(tmpdir.join('first.sandbox')))

    rmtree(str(tmpdir))
예제 #11
0
def test_local_workflows(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobs = [
        job.to_dict() for job in [
            Job(
                'first',
                JobExecution('sleep',
                             args=['2s'],
                             wd=abspath(tmpdir.join('first.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1))),
            Job('second',
                JobExecution('sleep',
                             args=['1s'],
                             wd=abspath(tmpdir.join('second.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1)),
                dependencies=JobDependencies(after=['first'])),
            Job('third',
                JobExecution('date',
                             wd=abspath(tmpdir.join('third.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1)),
                dependencies=JobDependencies(after=['first', 'second']))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    # the ammount of resources should be enough to theoretically start all three job's at once
    sys.argv = [
        'QCG-PilotJob', '--file', '--file-path',
        str(file_path), '--nodes', '4', '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    jnames = ['first', 'second', 'third']
    check_job_status_in_json(jnames, workdir=str(tmpdir), dest_state='SUCCEED')
    for jname in jnames:
        assert all((isdir(abspath(tmpdir.join('{}.sandbox'.format(jname)))),
                    exists(
                        join(abspath(tmpdir.join('{}.sandbox'.format(jname))),
                             'out')),
                    exists(
                        join(abspath(tmpdir.join('{}.sandbox'.format(jname))),
                             'err'))))

    with open(join(find_single_aux_dir(str(tmpdir)), 'jobs.report'), 'r') as f:
        job_stats = [json.loads(line) for line in f.readlines()]

    assert len(job_stats) == len(jnames)

    jstats = {}
    for i in range(0, len(jnames)):
        job = job_stats[i]

        print('readed job stats: {}'.format(str(job)))
        t = datetime.strptime(job['runtime']['rtime'], "%H:%M:%S.%f")
        rtime = timedelta(hours=t.hour,
                          minutes=t.minute,
                          seconds=t.second,
                          microseconds=t.microsecond)

        # find start executing time
        exec_state = list(
            filter(lambda st_en: st_en['state'] == 'EXECUTING',
                   job['history']))
        assert len(exec_state) == 1

        # find finish executing time
        finish_state = list(
            filter(lambda st_en: st_en['state'] == 'SUCCEED', job['history']))
        assert len(finish_state) == 1

        start_time = datetime.strptime(exec_state[0]['date'],
                                       '%Y-%m-%dT%H:%M:%S.%f')
        finish_time = datetime.strptime(finish_state[0]['date'],
                                        '%Y-%m-%dT%H:%M:%S.%f')

        jstats[job['name']] = {
            'r_time': rtime,
            's_time': start_time,
            'f_time': finish_time
        }

    # assert second job started after the first one
    assert jstats['second']['s_time'] > jstats['first']['f_time']

    # assert third job started after the first and second ones
    assert all((jstats['third']['s_time'] > jstats['first']['f_time'],
                jstats['third']['s_time'] > jstats['second']['f_time']))

    rmtree(str(tmpdir))
def test_job_description_simple():

    # missing 'name' element
    with pytest.raises(Exception):
        jobd = '{ }'
        job = Job(**json.loads(jobd))

    # missing 'execution' element
    with pytest.raises(Exception):
        jobd = '{ "name": "job1" }'
        job = Job(**json.loads(jobd))

    # missing 'resources' element
    with pytest.raises(Exception):
        jobd = '{ "name": "job1", "execution": { "exec": "/bin/date" } }'
        job = Job(**json.loads(jobd))

    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2  } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with minimal resource requirements"

    jobd = """{ "name": "job1", 
              "execution": { "script": "#!/bin/bash\\n/bin/date\\n" },
              "resources": { "numCores": 2  } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with minimal resource requirements"

    # wrong job name
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": ":job1", 
                  "execution": { "script": "#!/bin/bash\\n/bin/date\\n" },
                  "resources": { "numCores": 2  } }"""
        Job(**json.loads(jobd))

    # no script nor exec
    with pytest.raises(IllegalJobDescription):
        jobd = '{ "name": "job1", "execution": {  }, "resources": { "numCores": 2 } }'
        Job(**json.loads(jobd))

    # both script and exec
    with pytest.raises(IllegalJobDescription):
        jobd = '{ "name": "job1", "execution": { "exec": "/bin/date", "script": "#!/bin/bash\\n/bin/date\\n" }, "resources": { "numCores": 2 } }'
        Job(**json.loads(jobd))

    # both script and arguments
    with pytest.raises(IllegalJobDescription):
        jobd = '{ "name": "job1", "execution": { "script": "#!/bin/bash\\n/bin/date\\n", "args": [ "1", "2" ] }, "resources": { "numCores": 2 } }'
        Job(**json.loads(jobd))

    # both script and env
    with pytest.raises(IllegalJobDescription):
        jobd = '{ "name": "job1", "execution": { "script": "#!/bin/bash\\n/bin/date\\n", "env": { "var1": "1" } }, "resources": { "numCores": 2 } }'
        Job(**json.loads(jobd))

    # modules ok
    jobd = '{ "name": "job1", "execution": { "exec": "/bin/date", "modules": [ "python/3.6" ] }, "resources": { "numCores": 2 } }'
    job = Job(**json.loads(jobd))
    assert job, "Simple job with single module"

    # modules not as list
    jobd = '{ "name": "job1", "execution": { "exec": "/bin/date", "modules": "python/3.6" }, "resources": { "numCores": 2 } }'
    job = Job(**json.loads(jobd))
    assert job, "Simple job with module as string"

    # arguments not as list
    with pytest.raises(IllegalJobDescription):
        jobd = '{ "name": "job1", "execution": { "exec": "/bin/date", "args": "illegal_argument_format" }, "resources": { "numCores": 2 } }'
        Job(**json.loads(jobd))

    # environment not as list
    with pytest.raises(IllegalJobDescription):
        jobd = '{ "name": "job1", "execution": { "exec": "/bin/date", "env": "illegal_environment_list" }, "resources": { "numCores": 2 } }'
        Job(**json.loads(jobd))
def test_job_description_subjobs():
    # iteration job
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations

    # iteration names
    assert j.get_name() == 'j1'
    assert all(
        j.get_name(it) == '{}:{}'.format(j.get_name(), it) for it in range(10))

    # iteration states (initial)
    assert j.state() == JobState.QUEUED
    assert all(j.state(it) == JobState.QUEUED for it in range(10))
    assert all(j.str_state(it) == JobState.QUEUED.name for it in range(10))

    # iteration runtimes
    for it in range(10):
        j.append_runtime({'host': 'local.{}'.format(it)}, it)
    assert all(
        j.runtime(it).get('host') == 'local.{}'.format(it) for it in range(10))

    # whole job success
    for it in range(10):
        j.set_state(JobState.SUCCEED, it, 'job {} succeed'.format(it))
    assert j.state() == JobState.SUCCEED
    assert all(
        j.messages(it) == 'job {} succeed'.format(it) for it in range(10))

    # whole job fail (one of the iteration failed)
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    for it in range(9):
        j.set_state(JobState.SUCCEED, it, 'job {} succeed'.format(it))
    j.set_state(JobState.FAILED, 9, 'job 9 failed')
    assert j.state() == JobState.FAILED
    assert all(
        j.messages(it) == 'job {} succeed'.format(it) for it in range(9))
    assert j.messages(9) == 'job 9 failed'

    # whole job fail (one of the iteration canceled)
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    for it in range(9):
        j.set_state(JobState.SUCCEED, it, 'job {} succeed'.format(it))
    j.set_state(JobState.CANCELED, 9, 'job 9 canceled')
    assert j.state() == JobState.FAILED
    assert all(
        j.messages(it) == 'job {} succeed'.format(it) for it in range(9))
    assert j.messages(9) == 'job 9 canceled'

    # whole job fail (one of the iteration omitted)
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    for it in range(9):
        j.set_state(JobState.SUCCEED, it, 'job {} succeed'.format(it))
    j.set_state(JobState.OMITTED, 9, 'job 9 omitted')
    assert j.state() == JobState.FAILED
    assert all(
        j.messages(it) == 'job {} succeed'.format(it) for it in range(9))
    assert j.messages(9) == 'job 9 omitted'

    # not all iterations finished
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    for it in range(9):
        j.set_state(JobState.SUCCEED, it, 'job {} succeed'.format(it))
    assert j.state() == JobState.QUEUED
    assert all(
        j.messages(it) == 'job {} succeed'.format(it) for it in range(9))

    # whole job fail (just one succeed
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    for it in range(9):
        j.set_state(JobState.FAILED, it, 'job {} failed'.format(it))
    j.set_state(JobState.CANCELED, 9, 'job 9 succeed')
    assert j.state() == JobState.FAILED
    assert all(j.messages(it) == 'job {} failed'.format(it) for it in range(9))
    assert j.messages(9) == 'job 9 succeed'

    # many messages per iteration
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    for it in range(10):
        j.set_state(JobState.EXECUTING, it, 'job {} executing'.format(it))
    assert j.state() == JobState.QUEUED
    assert all(j.state(it) == JobState.EXECUTING for it in range(10))
    assert all(j.str_state(it) == JobState.EXECUTING.name for it in range(10))
    for it in range(10):
        j.set_state(JobState.SUCCEED, it, 'job {} finished'.format(it))
    assert j.state() == JobState.SUCCEED
    assert all(j.state(it) == JobState.SUCCEED for it in range(10))
    assert all(j.str_state(it) == JobState.SUCCEED.name for it in range(10))
    assert all(
        j.messages(it) == 'job {it} executing\njob {it} finished'.format(it=it)
        for it in range(10))

    # messages for job
    j = Job(name='j1',
            execution=JobExecution(exec='/bin/date'),
            resources=JobResources(numCores=1),
            iteration=JobIteration(stop=10))
    assert j.has_iterations
    j.set_state(JobState.EXECUTING, iteration=None, err_msg='job executing')
    assert all((j.state() == JobState.EXECUTING,
                j.str_state() == JobState.EXECUTING.name))
    assert j.messages() == 'job executing'
    for it in range(10):
        j.set_state(JobState.EXECUTING, it, 'job {} executing'.format(it))
    j.set_state(JobState.FAILED, iteration=None, err_msg='job failed')
    assert all(
        (j.state() == JobState.FAILED, j.str_state() == JobState.FAILED.name))
    assert j.messages() == 'job executing\njob failed'
    # failed job will not change state if once set
    for it in range(10):
        j.set_state(JobState.SUCCEED, it, 'job {} finished'.format(it))
    assert all(
        (j.state() == JobState.FAILED, j.str_state() == JobState.FAILED.name))
    assert all(j.str_state(it) == JobState.SUCCEED.name for it in range(10))
    assert all(
        j.messages(it) == 'job {it} executing\njob {it} finished'.format(it=it)
        for it in range(10))
def test_job_description_iterations():
    # no iterations
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert not job.has_iterations

    # valid iterations
    jobd = """{ "name": "job1", 
              "iteration": { "start": 0, "stop": 10 },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert all((job.has_iterations, job.iteration.iterations() == 10))
    assert all(job.iteration.in_range(i) for i in range(10))
    assert all(not job.iteration.in_range(i) for i in range(10, 20))

    # valid iterations with default start
    jobd = """{ "name": "job1", 
              "iteration": { "stop": 10 },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert all((job.has_iterations, job.iteration.iterations() == 10))
    assert all(job.iteration.in_range(i) for i in range(10))
    assert all(not job.iteration.in_range(i) for i in range(10, 20))

    # valid iterations
    jobd = """{ "name": "job1", 
              "iteration": { "start": 5, "stop": 10 },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert all((job.has_iterations, job.iteration.iterations() == 5))
    assert all(job.iteration.in_range(i) for i in range(5, 10))
    assert all(not job.iteration.in_range(i) for i in range(0, 5))

    # not valid iteration type
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "iteration": [ 5, 10 ],
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # not valid iteration spec
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "iteration": {  },
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # not valid iteration spec
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "iteration": { "iterations": 5 },
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # not valid iteration spec
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "iteration": { "start": 5, "stop": 0, "step": 2 },
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # wrong iteration range
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "iteration": { "start": 5, "stop": 0 },
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # wrong iteration range
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "iteration": { "start": 5, "stop": 5 },
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # json serialization
    jit = JobIteration(start=0, stop=10)
    jit_json = jit.to_json()

    jit_clone = JobIteration(**json.loads(jit_json))
    jit.to_dict() == jit_clone.to_dict()

    # string serialization
    assert str(jit) == '{}-{}'.format(0, 10)

    # serialization
    jobd = """{ "name": "job1", 
              "iteration": { "start": 5, "stop": 10 },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with dependencies"
    job_dict = job.to_dict()

    job_clone = json.loads(job.to_json())
    assert job_dict == job_clone
def test_job_description_resources():
    # a resource size

    # range
    rs = ResourceSize(min=4, max=5)
    assert all((rs.exact is None, rs.scheduler is None, rs.min == 4,
                rs.max == 5, not rs.is_exact()))
    assert rs.range == (4, 5)

    # exact
    rs = ResourceSize(exact=4)
    assert all(
        (rs.exact == 4, rs.scheduler is None, rs.min is None, rs.max is None,
         rs.is_exact()))
    assert rs.range == (None, None)

    # range with scheduler (just for tests0
    rs = ResourceSize(min=4, max=5, scheduler="sched1")
    assert all((rs.exact is None, rs.scheduler == "sched1", rs.min == 4,
                rs.max == 5, not rs.is_exact()))
    assert rs.range == (4, 5)

    # exact with scheduler
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(exact=4, scheduler="sched1")

    # no data
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize()

    # no required data
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(scheduler="shed1")

    # range and exact
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(exact=4, min=2)

    # range and exact
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(exact=4, max=2)

    # illegal exact
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(exact=-1)

    # illegal range
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(max=-2)
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(min=-2)
    with pytest.raises(IllegalResourceRequirements):
        ResourceSize(min=4, max=2)

    # serialization with range
    rs = ResourceSize(min=4, max=5, scheduler="sched1")
    assert all((rs.exact is None, rs.scheduler == "sched1", rs.min == 4,
                rs.max == 5, not rs.is_exact()))
    assert rs.range == (4, 5)
    rs_json = rs.to_json()

    rs_clone = ResourceSize(**json.loads(rs_json))
    assert all((rs_clone.exact is None, rs_clone.scheduler == "sched1",
                rs_clone.min == 4, rs_clone.max == 5, not rs_clone.is_exact()))
    assert rs_clone.range == (4, 5)
    rs_clone.to_dict() == rs_clone.to_dict()

    # serialization with exact
    rs = ResourceSize(exact=2)
    assert all(
        (rs.exact == 2, rs.scheduler is None, rs.min is None, rs.max is None,
         rs.is_exact()))
    assert rs.range == (None, None)
    rs_json = rs.to_json()

    rs_clone = ResourceSize(**json.loads(rs_json))
    assert all(
        (rs_clone.exact == 2, rs_clone.scheduler is None, rs_clone.min is None,
         rs_clone.max is None, rs_clone.is_exact()))
    assert rs_clone.range == (None, None)
    rs_clone.to_dict() == rs_clone.to_dict()

    # number of cores as a number
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2  } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with integer number of cores"

    # number of cores as an exact object
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "exact": 2 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with number of cores as an exact object"

    # number of cores as a range object
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "min": 2, "max": 3 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with number of cores as a range object"

    # number of cores as a range object, with one of the boundary
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "min": 2 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with number of cores as a range object with only min boundary"

    # number of cores as a range object, with one of the boundary
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": { "max": 2 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Simple job with number of cores as a range object with only max boundary"

    # empty resources element
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { } }"""
        Job(**json.loads(jobd))

    # no cores specification
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { } } }"""
        Job(**json.loads(jobd))

    # no nodes specification
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numNodes": { } } }"""
        Job(**json.loads(jobd))

    # illegal type of resources specification
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": [ "numNodes" ] }"""
        Job(**json.loads(jobd))

    # illegal type of cores specification
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": [ 1 ] } }"""
        Job(**json.loads(jobd))

    # illegal type of nodes specification
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numNodes": [ 1 ] } }"""
        Job(**json.loads(jobd))

    # exact number with range
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { "exact": 2, "min": 1, "max": 3 } } }"""
        job = Job(**json.loads(jobd))

    # exact number with one of the range boundary
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { "exact": 2, "min": 1 } } }"""
        job = Job(**json.loads(jobd))

    # number of cores negative
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": -2 } }"""
        job = Job(**json.loads(jobd))

    # 'max' greater than 'min' in range object
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { "min": 4, "max": 3 } } }"""
        job = Job(**json.loads(jobd))

    # range boundary negative
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { "min": -2 } } }"""
        job = Job(**json.loads(jobd))

    # range boundary negative
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": { "min": -4, "max": -1 } } }"""
        job = Job(**json.loads(jobd))

    # general crs
    assert not JobResources(numCores=1).has_crs
    jr = JobResources(numCores=1, nodeCrs={'gpu': 1})
    assert all((jr.has_crs, len(jr.crs) == 1, CRType.GPU
                in jr.crs, jr.crs[CRType.GPU] == 1))

    # crs without cores count
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "nodeCrs": { "gpu": 1 } } }"""
        job = Job(**json.loads(jobd))

    # gpu cr
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "nodeCrs": { "gpu": 1 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with node consumable resources (gpu)"

    # mem cr
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "nodeCrs": { "mem": 1 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with node consumable resources (mem)"

    # gpu cr with non-uniform letter case
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "nodeCrs": { "GpU": 1 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with node consumable resources (gpu)"

    # mem cr with non-uniform letter case
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "nodeCrs": { "meM": 1 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with node consumable resources (mem)"

    # many crs with non-uniform letter case
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "nodeCrs": { "meM": 1, "gPU": 2 } } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with node consumable resources (mem)"

    # unknown cr
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "strange_cr": 1 } } }"""
        Job(**json.loads(jobd))

    # gpu cr without integer value
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "gpu": "1" } } }"""
        Job(**json.loads(jobd))

    # mem cr without integer value
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "mem": "one" } } }"""
        Job(**json.loads(jobd))

    # gpu cr with negative integer value
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "gpu": -1 } } }"""
        Job(**json.loads(jobd))

    # gpu cr with 0
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "gpu": 0 } } }"""
        Job(**json.loads(jobd))

    # repeating gpu cr
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "gpu": 1, "GpU": 2 } } }"""
        Job(**json.loads(jobd))

    # repeating gpu cr
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": { "gpu": 1, "mem": 2, "GpU": 2 } } }"""
        Job(**json.loads(jobd))

    # wrong format of cr's
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2, "nodeCrs": [  ] } }"""
        Job(**json.loads(jobd))

    # num cores & nodes as integers
    jr = JobResources(numCores=2)
    assert all((jr.cores.is_exact(), jr.cores.exact == 2))
    jr = JobResources(numNodes=3)
    assert all((jr.nodes.is_exact(), jr.nodes.exact == 3))

    # min number of cores
    assert JobResources(numCores=ResourceSize(
        exact=2)).get_min_num_cores() == 2
    assert JobResources(numCores=ResourceSize(min=4)).get_min_num_cores() == 4
    assert JobResources(
        numCores=ResourceSize(min=3, max=6)).get_min_num_cores() == 3
    assert JobResources(
        numCores=ResourceSize(exact=2),
        numNodes=ResourceSize(exact=1)).get_min_num_cores() == 2
    assert JobResources(numCores=ResourceSize(exact=2),
                        numNodes=ResourceSize(min=4)).get_min_num_cores() == 8
    assert JobResources(numCores=ResourceSize(exact=2),
                        numNodes=ResourceSize(min=4,
                                              max=5)).get_min_num_cores() == 8
    assert JobResources(
        numCores=ResourceSize(min=3, max=6),
        numNodes=ResourceSize(exact=2)).get_min_num_cores() == 6
    assert JobResources(numCores=ResourceSize(min=3, max=6),
                        numNodes=ResourceSize(min=4,
                                              max=6)).get_min_num_cores() == 12

    # walltime in job resources
    jr = JobResources(numCores=ResourceSize(exact=2), wt="10m")
    assert jr.wt == timedelta(minutes=10)

    # errors
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="")
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="0")
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="2")
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="2d")
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="two hours")
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="-10")
    with pytest.raises(IllegalResourceRequirements):
        JobResources(numCores=ResourceSize(exact=2), wt="-10s")

    # job resources serialization
    jr = JobResources(numCores=ResourceSize(min=2, max=6, scheduler="sched1"),
                      numNodes=ResourceSize(exact=4),
                      wt="10m",
                      nodeCrs={"gpu": 2})
    jr_json = jr.to_json()

    jr_clone = JobResources(**json.loads(jr_json))
    jr.to_dict() == jr_clone.to_dict()

    # walltime
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "wt": "10m" } }"""
    job = Job(**json.loads(jobd))
    assert job.resources.wt == timedelta(minutes=10)

    # walltime 2
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "wt": "24h" } }"""
    job = Job(**json.loads(jobd))
    assert job.resources.wt == timedelta(hours=24)

    # walltime 3
    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "wt": "24h10m5s" } }"""
    job = Job(**json.loads(jobd))
    assert job.resources.wt == timedelta(hours=24, minutes=10, seconds=5)

    # missing walltime value
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "wt": "" } }"""
        job = Job(**json.loads(jobd))
        print('job walltime: {}'.format(str(job.resources.wt)))

    # wrong walltime format walltime
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "wt": "2d" } }"""
        Job(**json.loads(jobd))

    # wrong walltime format walltime 2
    with pytest.raises(IllegalResourceRequirements):
        jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2, "wt": "2" } }"""
        Job(**json.loads(jobd))
def test_job_description_dependencies():
    # simple dependencies
    jobd = """{ "name": "job1", 
              "dependencies": { "after": [ "job2" ] },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert all((len(job.dependencies.after) == 1, 'job2'
                in job.dependencies.after))

    # valid dependencies
    jobd = """{ "name": "job1", 
              "dependencies": { "after": [ "job2", "job3" ] },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert all((len(job.dependencies.after) == 2, 'job2'
                in job.dependencies.after, 'job3' in job.dependencies.after))

    jobd = """{ "name": "job1", 
              "dependencies": { "after": [ "job2", "job3" ] },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert job.has_dependencies

    jobd = """{ "name": "job1", 
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert not job.has_dependencies

    jobd = """{ "name": "job1", 
              "dependencies": { "after": [  ] },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert not job.has_dependencies

    # dependencies not as list
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
                  "dependencies": { "after": "job2" },
                  "execution": { "exec": "/bin/date" },
                  "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # wrong keyword in job dependencies
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
              "dependencies": { "whenever": "job2" },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # wrong type of job dependencies
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
              "dependencies": { "after": { "job": "job2" } },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # wrong elements of job dependencies
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
              "dependencies": { "after": [ "job2", [ "job3", "job4" ] ] },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # wrong type of job dependencies
    with pytest.raises(IllegalJobDescription):
        jobd = """{ "name": "job1", 
              "dependencies": [ { "after": [ "job2", [ "job3", "job4" ] ] } ],
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
        Job(**json.loads(jobd))

    # json serialization
    jdep = JobDependencies(after=["job2", "job3"])
    jdep_json = jdep.to_json()
    assert all((jdep.has_dependencies, len(jdep.after) == 2, 'job2'
                in jdep.after, 'job3' in jdep.after))

    print("job dependencies as json: {}".format(jdep_json))
    jdep_clone = JobDependencies(**json.loads(jdep_json))
    jdep.to_dict() == jdep_clone.to_dict()

    # serialization
    jobd = """{ "name": "job1", 
              "dependencies": { "after": [ "job2", "job3" ] },
              "execution": { "exec": "/bin/date" },
              "resources": { "numCores": 2 } }"""
    job = Job(**json.loads(jobd))
    assert job, "Job with dependencies"
    job_dict = job.to_dict()

    job_clone = json.loads(job.to_json())
    assert job_dict == job_clone
예제 #17
0
    def resume(path, manager, progress=False):
        """Resume interrupted task iterations execution.

        Arguments:
            * path (str) - path to the directory where tracker files of interrupted execution has been saved
            * manager (Manager) - a manager class that operate scheduler queue
        """
        if progress:
            print(
                f'resuming computations with QCG-PilotJob auxiliary directory from {path} ...'
            )

        track_reqs_file = join(path, 'track.reqs')
        if not exists(track_reqs_file) or not isfile(track_reqs_file):
            raise ResumeError(
                f'tracer - {track_reqs_file} not found or not a file')

        track_states_file = join(path, 'track.states')
        if exists(track_states_file) and not isfile(track_states_file):
            raise ResumeError(f'tracer - {track_states_file} is not a file')

        job_requests = {}
        with open(track_reqs_file, "rt") as reqs_file:
            for req_line in reqs_file:
                job_req = json.loads(req_line)
                jname = job_req['name']
                if jname in job_requests:
                    raise ResumeError(
                        f'tracer - job {jname} contains duplicate entries in {path}'
                    )

                job_requests[jname] = Job(**job_req)

        _logger.info(f'tracker - read {len(job_requests)} job submit requests')
        if progress:
            print(f'read {len(job_requests)} previous job descriptions')

        job_statuses = []
        if exists(track_states_file):
            with open(track_states_file, "rt") as states_file:
                for state_line in states_file:
                    job_statuses.append(json.loads(state_line))

        _logger.info(f'read {len(job_statuses)} job states')
        if progress:
            print(f'read {len(job_statuses)} job/iteration previous states')

        jobs_to_enqueue = dict(job_requests)

        for job_status in job_statuses:
            jstate = JobState[job_status['state']]
            jname = job_status['task']
            jit = job_status['iteration']

            if job_status['iteration'] is not None:
                # parse job name to get a real job name
                jname, _ = JobList.parse_jobname(jname)

            if not jname in job_requests:
                raise ResumeError(
                    f'tracer - missing job {jname} submit request in {track_reqs_file}'
                )

            #                if not jstate.is_finished():
            #                    raise ResumeError(f'tracer - job {jname} contains not finished state {jstate}')

            job = job_requests[jname]
            job.set_state(jstate, jit)

            if jit is None:
                # we found job
                if jstate.is_finished():
                    del (jobs_to_enqueue[jname])
            else:
                # we found iteration
                pass

        _logger.info(f'job_requests length {len(job_requests)}')
        _logger.info(f'jobs_to_enqueue length {len(jobs_to_enqueue)}')

        # add all jobs to job list
        for job in job_requests.values():
            manager.job_list.add(job)

        _logger.info(
            f'tracker - found {len(job_requests)} total jobs with {len(jobs_to_enqueue)} unfinished'
        )

        return jobs_to_enqueue.values()
예제 #18
0
def test_slurmenv_simple_job():
    if not in_slurm_allocation() or get_num_slurm_nodes() < 2:
        pytest.skip(
            'test not run in slurm allocation or allocation is smaller than 2 nodes'
        )

    resources, allocation = get_slurm_resources_binded()
    resources_node_names = set(n.name for n in resources.nodes)

    set_pythonpath_to_qcg_module()
    tmpdir = str(tempfile.mkdtemp(dir=SHARED_PATH))

    file_path = join(tmpdir, 'jobs.json')
    print('tmpdir: {}'.format(tmpdir))

    jobName = 'mdate'
    jobs = [
        job.to_dict() for job in [
            Job(
                jobName,
                JobExecution('date',
                             wd=abspath(join(tmpdir, 'date.sandbox')),
                             stdout='date.out',
                             stderr='date.err'),
                JobResources(numCores=ResourceSize(1)))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--log', 'debug', '--file', '--file-path',
        str(file_path), '--wd', tmpdir, '--report-format', 'json'
    ]
    QCGPMService().start()

    jobEntries = check_job_status_in_json([jobName],
                                          workdir=tmpdir,
                                          dest_state='SUCCEED')
    assert all(
        (isdir(abspath(join(tmpdir, 'date.sandbox'))),
         exists(join(abspath(join(tmpdir, 'date.sandbox')), 'date.out')),
         exists(join(abspath(join(tmpdir, 'date.sandbox')), 'date.err')),
         stat(join(abspath(join(tmpdir, 'date.sandbox')), 'date.out')).st_size
         > 0))
    # there can be some debugging messages in the stderr
    #                stat(join(abspath(join(tmpdir, 'date.sandbox')), 'date.err')).st_size == 0))

    for jname, jentry in jobEntries.items():
        assert all(('runtime' in jentry, 'allocation'
                    in jentry.get('runtime', {})))

        jalloc = jentry['runtime']['allocation']
        for jalloc_node in jalloc.split(','):
            node_name = jalloc_node[:jalloc_node.index('[')]
            print('{} in available nodes ({})'.format(
                node_name, ','.join(resources_node_names)))
            assert node_name in resources_node_names, '{} not in nodes ({}'.format(
                node_name, ','.join(resources_node_names))

    with pytest.raises(ValueError):
        check_job_status_in_json([jobName + 'xxx'],
                                 workdir=tmpdir,
                                 dest_state='SUCCEED')

    rmtree(tmpdir)
예제 #19
0
def test_slurmenv_many_nodes_many_cores():
    if not in_slurm_allocation() or get_num_slurm_nodes() < 2:
        pytest.skip(
            'test not run in slurm allocation or allocation is smaller than 2 nodes'
        )

    resources, allocation = get_slurm_resources_binded()
    resources_node_names = set(n.name for n in resources.nodes)

    set_pythonpath_to_qcg_module()
    tmpdir = str(tempfile.mkdtemp(dir=SHARED_PATH))

    file_path = join(tmpdir, 'jobs.json')
    print('tmpdir: {}'.format(tmpdir))

    jobName = 'hostname'
    jobwdir_base = 'hostname.sandbox'
    cores_num = resources.nodes[0].free
    nodes_num = resources.total_nodes
    jobs = [
        job.to_dict() for job in [
            Job(
                jobName,
                JobExecution(exec='mpirun',
                             args=['--allow-run-as-root', 'hostname'],
                             wd=abspath(join(tmpdir, jobwdir_base)),
                             stdout='hostname.out',
                             stderr='hostname.err',
                             modules=['mpi/openmpi-x86_64']),
                JobResources(numCores=ResourceSize(cores_num),
                             numNodes=ResourceSize(nodes_num)))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--log', 'debug', '--file', '--file-path',
        str(file_path), '--wd', tmpdir, '--report-format', 'json'
    ]
    QCGPMService().start()

    jobEntries = check_job_status_in_json([jobName],
                                          workdir=tmpdir,
                                          dest_state='SUCCEED')
    assert all(
        (isdir(abspath(join(tmpdir, jobwdir_base))),
         exists(join(abspath(join(tmpdir, jobwdir_base)), 'hostname.out')),
         exists(join(abspath(join(tmpdir, jobwdir_base)), 'hostname.err')),
         stat(join(abspath(join(tmpdir, jobwdir_base)),
                   'hostname.out')).st_size > 0))

    job_nodes = []
    allocated_cores = 0
    for jname, jentry in jobEntries.items():
        assert all(('runtime' in jentry, 'allocation'
                    in jentry.get('runtime', {})))

        jalloc = jentry['runtime']['allocation']
        for jalloc_node in jalloc.split(','):
            node_name = jalloc_node[:jalloc_node.index('[')]
            job_nodes.append(node_name)
            print('{} in available nodes ({})'.format(
                node_name, ','.join(resources_node_names)))
            assert node_name in resources_node_names, '{} not in nodes ({}'.format(
                node_name, ','.join(resources_node_names))

            ncores = len(jalloc_node[jalloc_node.index('[') + 1:-1].split(':'))
            print('#{} cores on node {}'.format(ncores, node_name))
            allocated_cores += ncores
    assert len(job_nodes) == nodes_num, str(job_nodes)
    assert allocated_cores == nodes_num * cores_num, allocated_cores

    # check if hostname is in stdout in two lines
    with open(abspath(join(tmpdir, join(jobwdir_base, 'hostname.out'))),
              'rt') as stdout_file:
        stdout_content = [line.rstrip() for line in stdout_file.readlines()]
    assert len(stdout_content) == nodes_num * cores_num, str(stdout_content)
    assert all(hostname in job_nodes
               for hostname in stdout_content), str(stdout_content)

    with pytest.raises(ValueError):
        check_job_status_in_json([jobName + 'xxx'],
                                 workdir=tmpdir,
                                 dest_state='SUCCEED')

    rmtree(tmpdir)