def __checkServiceDir(self, wd): wdir = find_single_aux_dir(wd) if exists(wdir) and exists(join(wdir, 'address')): with open(join(wdir, 'address'), 'r') as f: return f.read() return None
def test_resume_tracker_files(tmpdir): try: m = LocalManager(['--log', 'debug', '--wd', tmpdir, '--report-format', 'json', '--nodes', '4'], {'wdir': str(tmpdir)}) job_req = { 'name': 'host', 'execution': { 'exec': '/bin/date', 'stdout': 'out', }, 'resources': { 'numCores': { 'exact': 1 } } } jobs = Jobs().add_std(job_req) submit_2_manager_and_wait_4_info(m, jobs, 'SUCCEED') time.sleep(1) aux_dir = find_single_aux_dir(str(tmpdir)) print(f'aux_dir content: {str(listdir(aux_dir))}') assert all(exists(join(aux_dir, fname)) for fname in ['track.reqs', 'track.states']), \ f"missing tracker files in {aux_dir}: {str(listdir(aux_dir))}" finally: if m: m.finish() m.cleanup() rmtree(tmpdir)
def check_service_log_string(resubstr, workdir='.'): service_log_file = join(find_single_aux_dir(workdir), 'service.log') regex = re.compile(resubstr) with open(service_log_file, 'r') as log_f: for line in log_f: if regex.search(line): return True return False
def test_local_manager_submit_simple(tmpdir): cores = 4 # switch on debugging (by default in api.log file) m = LocalManager(['--wd', str(tmpdir), '--nodes', str(cores)], {'wdir': str(tmpdir)}) try: res = m.resources() assert all( ('total_nodes' in res, 'total_cores' in res, res['total_nodes'] == 1, res['total_cores'] == cores)) ids = m.submit(Jobs().add(name='host', exec='/bin/hostname', args=['--fqdn'], stdout='host.stdout').add( name='date', exec='/bin/date', stdout='date.stdout', numCores={'exact': 2})) assert len(m.list()) == 2 m.wait4(ids) jinfos = m.info(ids) assert all( ('jobs' in jinfos, len(jinfos['jobs'].keys()) == 2, 'host' in jinfos['jobs'], 'date' in jinfos['jobs'], jinfos['jobs']['host'].get('data', {}).get('status', '') == 'SUCCEED', jinfos['jobs']['date'].get('data', {}).get('status', '') == 'SUCCEED')) aux_dir = find_single_aux_dir(str(tmpdir)) assert all( (exists(tmpdir.join('.qcgpjm-client', 'api.log')), exists(join(aux_dir, 'service.log')), exists(tmpdir.join('host.stdout')), exists(tmpdir.join('date.stdout')))) finally: m.finish() # m.stopManager() m.cleanup()
def check_job_status_in_json(jobs, workdir='.', dest_state='SUCCEED'): to_check = set(jobs) entries = {} report_path = join(find_single_aux_dir(workdir), 'jobs.report') with open(report_path, 'r') as report_f: for line, entry in enumerate(report_f, 1): try: job_entry = json.loads(entry) except JSONDecodeError as e: raise ValueError( 'wrong jobs.report {} file format: error in {} line: {}'. format(report_path, line, str(e))) for attr in ['name', 'state']: if not attr in job_entry: raise ValueError( 'wrong jobs.report {} file format: missing \'{}\' attribute' .format(report_path, attr)) jName = job_entry['name'] state = job_entry['state'] if jName in to_check: msg = job_entry.get('message', None) if state != dest_state: raise ValueError( 'job\'s {} incorrect status \'{}\' vs expected \'{}\' {}' .format(jName, state, dest_state, '(' + msg + ')' if msg else '')) to_check.remove(jName) entries[jName] = job_entry if len(to_check) == 0: break if len(to_check) > 0: raise ValueError('missing {} jobs in report'.format( ','.join(to_check))) return entries
def test_local_workflows(tmpdir): file_path = tmpdir.join('jobs.json') print('tmpdir: {}'.format(str(tmpdir))) jobs = [ job.to_dict() for job in [ Job( 'first', JobExecution('sleep', args=['2s'], wd=abspath(tmpdir.join('first.sandbox')), stdout='out', stderr='err'), JobResources(numCores=ResourceSize(1))), Job('second', JobExecution('sleep', args=['1s'], wd=abspath(tmpdir.join('second.sandbox')), stdout='out', stderr='err'), JobResources(numCores=ResourceSize(1)), dependencies=JobDependencies(after=['first'])), Job('third', JobExecution('date', wd=abspath(tmpdir.join('third.sandbox')), stdout='out', stderr='err'), JobResources(numCores=ResourceSize(1)), dependencies=JobDependencies(after=['first', 'second'])) ] ] reqs = [{ 'request': 'submit', 'jobs': jobs }, { 'request': 'control', 'command': 'finishAfterAllTasksDone' }] save_reqs_to_file(reqs, file_path) print('jobs saved to file_path: {}'.format(str(file_path))) # the ammount of resources should be enough to theoretically start all three job's at once sys.argv = [ 'QCG-PilotJob', '--file', '--file-path', str(file_path), '--nodes', '4', '--wd', str(tmpdir), '--report-format', 'json' ] QCGPMService().start() jnames = ['first', 'second', 'third'] check_job_status_in_json(jnames, workdir=str(tmpdir), dest_state='SUCCEED') for jname in jnames: assert all((isdir(abspath(tmpdir.join('{}.sandbox'.format(jname)))), exists( join(abspath(tmpdir.join('{}.sandbox'.format(jname))), 'out')), exists( join(abspath(tmpdir.join('{}.sandbox'.format(jname))), 'err')))) with open(join(find_single_aux_dir(str(tmpdir)), 'jobs.report'), 'r') as f: job_stats = [json.loads(line) for line in f.readlines()] assert len(job_stats) == len(jnames) jstats = {} for i in range(0, len(jnames)): job = job_stats[i] print('readed job stats: {}'.format(str(job))) t = datetime.strptime(job['runtime']['rtime'], "%H:%M:%S.%f") rtime = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond) # find start executing time exec_state = list( filter(lambda st_en: st_en['state'] == 'EXECUTING', job['history'])) assert len(exec_state) == 1 # find finish executing time finish_state = list( filter(lambda st_en: st_en['state'] == 'SUCCEED', job['history'])) assert len(finish_state) == 1 start_time = datetime.strptime(exec_state[0]['date'], '%Y-%m-%dT%H:%M:%S.%f') finish_time = datetime.strptime(finish_state[0]['date'], '%Y-%m-%dT%H:%M:%S.%f') jstats[job['name']] = { 'r_time': rtime, 's_time': start_time, 'f_time': finish_time } # assert second job started after the first one assert jstats['second']['s_time'] > jstats['first']['f_time'] # assert third job started after the first and second ones assert all((jstats['third']['s_time'] > jstats['first']['f_time'], jstats['third']['s_time'] > jstats['second']['f_time'])) rmtree(str(tmpdir))
def test_local_iter_scheduling_job_large(tmpdir): file_path = tmpdir.join('jobs.json') print('tmpdir: {}'.format(str(tmpdir))) jobName = "sleep-iter" nits = 20 jobSleepTime = 2 jobCores = 2 availCores = 10 rounds = nits * jobCores / availCores totalExecTime = rounds * jobSleepTime jobs = [{ "name": jobName, "iteration": { "stop": nits }, "execution": { "exec": "/bin/sleep", "args": ["{}s".format(str(jobSleepTime))], "wd": abspath(tmpdir.join("{}_$${{it}}".format(jobName))), "stdout": "sleep-iter.stdout", "stderr": "sleep-iter.stderr" }, "resources": { "numCores": { "exact": jobCores, } } }] reqs = [{ 'request': 'submit', 'jobs': jobs }, { 'request': 'control', 'command': 'finishAfterAllTasksDone' }] save_reqs_to_file(reqs, file_path) print('jobs saved to file_path: {}'.format(str(file_path))) sys.argv = [ 'QCG-PilotJob', '--log', 'debug', '--file', '--file-path', str(file_path), '--nodes', str(availCores), '--wd', str(tmpdir), '--report-format', 'json' ] QCGPMService().start() check_job_status_in_json( [jobName] + ["{}:{}".format(jobName, i) for i in range(0, nits)], workdir=str(tmpdir), dest_state='SUCCEED') for i in range(0, nits): wd_path = abspath(tmpdir.join("{}_{}".format(jobName, i))) stdout_path = join(wd_path, 'sleep-iter.stdout') stderr_path = join(wd_path, 'sleep-iter.stderr') assert all((isdir(wd_path), exists(stdout_path), exists(stderr_path) )), "stdout({}) and/or stderr({}) doesn't exist".format( stdout_path, stderr_path) with open(join(find_single_aux_dir(str(tmpdir)), 'jobs.report'), 'r') as f: job_stats = [json.loads(line) for line in f.readlines()] assert len(job_stats) == nits + 1 min_start, max_finish = None, None for i in range(0, nits): job = job_stats[i] print('readed job stats: {}'.format(str(job))) t = datetime.strptime(job['runtime']['rtime'], "%H:%M:%S.%f") rtime = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond) assert all((rtime.total_seconds() > jobSleepTime, rtime.total_seconds() < jobSleepTime + 2)), \ "job {} runtime exceeded assumed value {}s vs max {}s".format(i, rtime.total_seconds(), jobSleepTime + 2) # find start executing time exec_state = list( filter(lambda st_en: st_en['state'] == 'EXECUTING', job['history'])) assert len(exec_state) == 1 finish_state = list( filter(lambda st_en: st_en['state'] == 'SUCCEED', job['history'])) assert len(finish_state) == 1 start_time = datetime.strptime(exec_state[0]['date'], '%Y-%m-%dT%H:%M:%S.%f') finish_time = datetime.strptime(finish_state[0]['date'], '%Y-%m-%dT%H:%M:%S.%f') if not min_start or start_time < min_start: min_start = start_time if not max_finish or finish_time > max_finish: max_finish = finish_time assert all((min_start, finish_time)) # check if duration from executing first job till the end of last job is about 2 rounds, each with jobSleepTime scenario_duration = finish_time - min_start assert all((scenario_duration.total_seconds() > totalExecTime, scenario_duration.total_seconds() < totalExecTime + 4)), \ "scenario duration runtime exceeded assumed value {}s vs max {}s".format(scenario_duration.total_seconds(), totalExecTime + 4) rmtree(str(tmpdir))
def test_aux_dir_find(tmpdir): # in fresh directory there should be no aux dirs assert len(find_aux_dirs(tmpdir)) == 0 # directory that not start with .qcgpjm dname = 'somepath.qcgpjm' os.makedirs(tmpdir.join(dname)) assert len(find_aux_dirs(tmpdir)) == 0 shutil.rmtree(tmpdir.join(dname)) # valid directory without postfix dname = '.qcgpjm-service-' os.makedirs(tmpdir.join(dname)) assert len(find_aux_dirs(tmpdir)) == 1 and find_aux_dirs( tmpdir)[0] == os.path.abspath(tmpdir.join(dname)) shutil.rmtree(tmpdir.join(dname)) # file, not directory dname = '.qcgpjm-service-' open(str(tmpdir.join(dname)), 'w+').close() assert os.path.isfile(str(tmpdir.join(dname))) assert len(find_aux_dirs(tmpdir)) == 0 os.remove(tmpdir.join(dname)) # not top level directory subdir = 'subdirectory' dname = '{}/.qcgpjm-service'.format(subdir) os.makedirs(tmpdir.join(dname)) assert len(find_aux_dirs(tmpdir)) == 0 shutil.rmtree(tmpdir.join(subdir)) # many directories with postfixes dnames = [ '.qcgpjm-service-governor-0', '.qcgpjm-service-manager-0', '.qcgpjm-service-manager-1' ] for dname in dnames: os.makedirs(tmpdir.join(dname)) aux_dirs = find_aux_dirs(tmpdir) assert len(aux_dirs) == len(dnames) assert all((os.path.abspath(str(tmpdir.join(dname))) in aux_dirs for dname in dnames)) for dname in dnames: shutil.rmtree(str(tmpdir.join(dname))) # no auxiliary directories with pytest.raises(Exception): find_single_aux_dir(str(tmpdir)) # too many directories dnames = [ '.qcgpjm-service-governor-0', '.qcgpjm-service-manager-0', '.qcgpjm-service-manager-1' ] for dname in dnames: os.makedirs(tmpdir.join(dname)) with pytest.raises(Exception): find_single_aux_dir(str(tmpdir)) for dname in dnames: shutil.rmtree(str(tmpdir.join(dname))) # just one auxiliary dir dname = '.qcgpjm-service-0' os.makedirs(tmpdir.join(dname)) assert find_single_aux_dir(str(tmpdir)) == os.path.abspath( str(tmpdir.join(dname))) shutil.rmtree(tmpdir.join(dname)) # last modified (last created) auxiliary dir dnames = [ '.qcgpjm-service-manager-1', '.qcgpjm-service-manager-3', '.qcgpjm-service-manager-2' ] for dname in dnames: os.makedirs(tmpdir.join(dname)) time.sleep(1) with pytest.raises(Exception): find_single_aux_dir(str(tmpdir)) assert find_latest_aux_dir(tmpdir) == os.path.abspath( str(tmpdir.join(dnames[-1]))) for dname in dnames: shutil.rmtree(str(tmpdir.join(dname)))