def submit_calculation(task, input_, cluster, image, run_parameters, root_folder, container_description, input_folder, output_folder, scratch_folder): job = _create_job(task, cluster, image, run_parameters, container_description, input_folder, output_folder, scratch_folder) girder_token = task.taskflow.girder_token task.taskflow.set_metadata('cluster', cluster) # Now download and submit job to the cluster task.taskflow.logger.info('Downloading input files to cluster.') download_job_input_folders(cluster, job, girder_token=girder_token, submit=False) task.taskflow.logger.info('Downloading complete.') task.taskflow.logger.info('Submitting job %s to cluster.' % job['_id']) submit_job(cluster, job, girder_token=girder_token, monitor=False) monitor_job.apply_async( (cluster, job), { 'girder_token': girder_token, 'monitor_interval': 10 }, link=postprocess_job.s(input_, cluster, image, run_parameters, root_folder, container_description, input_folder, output_folder, scratch_folder, job))
def submit(task, input_, cluster, run_folder, input_file, input_folder): job = _create_job(task, cluster, input_file, input_folder) girder_token = task.taskflow.girder_token task.taskflow.set_metadata('cluster', cluster) # Now download and submit job to the cluster task.taskflow.logger.info('Downloading input files to cluster.') download_job_input_folders(cluster, job, girder_token=girder_token, submit=False) task.taskflow.logger.info('Downloading complete.') task.taskflow.logger.info('Submitting job %s to cluster.' % job['_id']) girder_token = task.taskflow.girder_token try: submit_job(cluster, job, girder_token=girder_token, monitor=False) except: import traceback traceback.print_exc() monitor_job.apply_async((cluster, job), { 'girder_token': girder_token, 'monitor_interval': 10 }, link=postprocess.s(run_folder, input_, cluster, job))
def submit_paraview_job(task, cluster, job, *args, **kwargs): task.taskflow.logger.info('Submitting job to cluster.') girder_token = task.taskflow.girder_token params = {} if 'dataDir' in kwargs: params['dataDir'] = kwargs['dataDir'] if 'fileName' in kwargs: params['fileName'] = kwargs['fileName'] if 'simulationJobId' in kwargs: params['simulationJobId'] = kwargs['simulationJobId'] if 'sessionKey' in kwargs: params['sessionKey'] = kwargs['sessionKey'] # Save the sessionKey so we can clean up the proxy entry task.taskflow.set_metadata('sessionKey', kwargs['sessionKey']) if 'numberOfSlots' in kwargs: params['numberOfSlots'] = kwargs['numberOfSlots'] parallel_environment \ = parse('config.parallelEnvironment').find(cluster) if parallel_environment: parallel_environment = parallel_environment[0].value params['parallelEnvironment'] = parallel_environment job_output_dir = get_cluster_job_output_dir(cluster) if job_output_dir: params['jobOutputDir'] = job_output_dir paraview_install_dir \ = parse('config.paraview.installDir').find(cluster) if paraview_install_dir: paraview_install_dir = paraview_install_dir[0].value params['paraviewInstallDir'] = paraview_install_dir # Does the cluster have GPUs? params['gpu'] = has_gpus(cluster) or kwargs.get('numberOfGpusPerNode', 0) > 0 job['params'] = params # Create proxy entry if cluster['type'] == 'ec2': create_proxy_entry(task, cluster, job) # Before we submit the job upload any file we may have been given upload_input(task, cluster, job, *args, **kwargs) submit_job(cluster, job, log_write_url=None, girder_token=girder_token, monitor=False) monitor_paraview_job.delay(cluster, job, *args, **kwargs)
def submit_pyfr_job(task, cluster, job, *args, **kwargs): task.logger.info('Submitting job %s to cluster.' % job['_id']) girder_token = task.taskflow.girder_token job['params'].update(kwargs) submit_job(cluster, job, log_write_url=None, girder_token=girder_token, monitor=False) monitor_pyfr_job.delay(cluster, job, *args, **kwargs)
def submit_paraview_job(task, cluster, job, *args, **kwargs): task.taskflow.logger.info('Submitting job to cluster.') girder_token = task.taskflow.girder_token params = {} if 'dataDir' in kwargs: params['dataDir'] = kwargs['dataDir'] if 'fileName' in kwargs: params['fileName'] = kwargs['fileName'] if 'simulationJobId' in kwargs: params['simulationJobId'] = kwargs['simulationJobId'] if 'sessionKey' in kwargs: params['sessionKey'] = kwargs['sessionKey'] # Save the sessionKey so we can clean up the proxy entry task.taskflow.set_metadata('sessionKey', kwargs['sessionKey']) parallel_environment \ = parse('config.parallelEnvironment').find(cluster) if parallel_environment: parallel_environment = parallel_environment[0].value params['parallelEnvironment'] = parallel_environment params['numberOfSlots'] = 1 job_output_dir = get_cluster_job_output_dir(cluster) if job_output_dir: params['jobOutputDir'] = job_output_dir paraview_install_dir \ = parse('config.paraview.installDir').find(cluster) if paraview_install_dir: paraview_install_dir = paraview_install_dir[0].value params['paraviewInstallDir'] = paraview_install_dir # Does the cluster have GPUs? params['gpu'] = has_gpus(cluster) or kwargs.get('numberOfGpusPerNode', 0) > 0 job['params'] = params # Create proxy entry if cluster['type'] == 'ec2': create_proxy_entry(task, cluster, job) # Before we submit the job upload any file we may have been given upload_input(task, cluster, job, *args, **kwargs) submit_job(cluster, job, log_write_url=None, girder_token=girder_token, monitor=False) monitor_paraview_job.delay(cluster, job, *args, **kwargs)
def start(task, input_, user, cluster, image, run_parameters): """ The flow is the following: - Dry run the container with the -d flag to obtain a description of the input/output formats - Convert the cjson input geometry to conform to the container's expected format - Run the container - Convert the container output format into cjson - Ingest the output in the database """ client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) if cluster.get('name') == 'cori': cluster = get_cori(client) if '_id' not in cluster: log_and_raise(task, 'Invalid cluster configurations: %s' % cluster) oc_folder = get_oc_folder(client) root_folder = client.createFolder( oc_folder['_id'], datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f")) # temporary folder to save the container in/out description description_folder = client.createFolder(root_folder['_id'], 'description') job = _create_description_job(task, cluster, description_folder, image, run_parameters) task.taskflow.logger.info( 'Preparing job to obtain the container description.') download_job_input_folders(cluster, job, girder_token=task.taskflow.girder_token, submit=False) task.taskflow.logger.info( 'Submitting job to obtain the container description.') submit_job(cluster, job, girder_token=task.taskflow.girder_token, monitor=False) monitor_job.apply_async( (cluster, job), { 'girder_token': task.taskflow.girder_token, 'monitor_interval': 10 }, countdown=countdown(cluster), link=postprocess_description.s(input_, user, cluster, image, run_parameters, root_folder, job, description_folder))
def submit_nwchem_job(task, upstream_result): job = upstream_result['job'] task.logger.info('Submitting job %s to cluster.' % job['_id']) girder_token = task.taskflow.girder_token cluster = upstream_result['cluster'] job_params = upstream_result.copy() job_params.pop('cluster') job_params.pop('job') job['params'].update(job_params) submit_job(cluster, job, log_write_url=None, girder_token=girder_token, monitor=False) return upstream_result
def start(task, user, cluster, image, container): client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) if cluster.get('name') == 'cori': cluster = get_cori(client) if '_id' not in cluster: log_and_raise(task, 'Invalid cluster configurations: %s' % cluster) oc_folder = get_oc_folder(client) root_folder = client.createFolder( oc_folder['_id'], datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f")) # temporary folder to save the container in/out description folder = client.createFolder(root_folder['_id'], 'pull_folder') # save the pull.py script to the job directory with open(os.path.join(os.path.dirname(__file__), 'utils/pull.py'), 'rb') as f: # Get the size of the file size = f.seek(0, 2) f.seek(0) name = 'pull.py' client.uploadFile(folder['_id'], f, name, size, parentType='folder') job = _create_job(task, cluster, folder, image, container) # Now download pull.py script to the cluster task.taskflow.logger.info('Preparing job to pull the container.') download_job_input_folders(cluster, job, girder_token=task.taskflow.girder_token, submit=False) task.taskflow.logger.info('Submitting job to pull the container.') submit_job(cluster, job, girder_token=task.taskflow.girder_token, monitor=False) monitor_job.apply_async((cluster, job), { 'girder_token': task.taskflow.girder_token, 'monitor_interval': 10 }, countdown=countdown(cluster), link=postprocess_job.s(user, cluster, image, job, folder, container))
def submit_paraview_job(task, cluster, job, *args, **kwargs): task.taskflow.logger.info('Submitting job to cluster.') girder_token = task.taskflow.girder_token params = {} if 'dataDir' in kwargs: params['dataDir'] = kwargs['dataDir'] if 'fileName' in kwargs: params['fileName'] = kwargs['fileName'] if 'simulationJobId' in kwargs: params['simulationJobId'] = kwargs['simulationJobId'] if 'sessionKey' in kwargs: params['sessionKey'] = kwargs['sessionKey'] # Save the sessionKey so we can clean up the proxy entry task.taskflow.set_metadata('sessionKey', kwargs['sessionKey']) parallel_environment \ = parse('config.parallelEnvironment').find(cluster) if parallel_environment: parallel_environment = parallel_environment[0].value params['parallelEnvironment'] = parallel_environment job_output_dir = get_cluster_job_output_dir(cluster) if job_output_dir: params['jobOutputDir'] = job_output_dir paraview_install_dir \ = parse('config.paraview.installDir').find(cluster) if paraview_install_dir: paraview_install_dir = paraview_install_dir[0].value params['paraviewInstallDir'] = paraview_install_dir job['params'] = params # Before we submit the job upload any file we may have been given upload_input(task, cluster, job, *args, **kwargs) submit_job(cluster, job, log_write_url=None, girder_token=girder_token, monitor=False) monitor_paraview_job.delay(cluster, job, *args, **kwargs)
def submit_open_foam_job(task, cluster, job, *args, **kwargs): # Now download job inputs task.logger.info('Uploading input files to cluster.') download_job_input_folders(cluster, job, log_write_url=None, girder_token=task.taskflow.girder_token, submit=False) task.logger.info('Uploading complete.') # Setup job parameters task.taskflow.logger.info('Submitting job to cluster.') job['params'] = {} ## parallel_environment parallel_environment = parse('config.parallelEnvironment').find(cluster) if parallel_environment: parallel_environment = parallel_environment[0].value job['params']['parallelEnvironment'] = parallel_environment ## slots job['params']['numberOfSlots'] = 1 ## output dir job_output_dir = get_cluster_job_output_dir(cluster) if job_output_dir: job['params']['jobOutputDir'] = job_output_dir # Submit job to the queue submit_job(cluster, job, log_write_url=None, girder_token=task.taskflow.girder_token, monitor=False) # Move to the next task monitor_open_foam_job.delay(cluster, job, *args, **kwargs)
def start(task, input_, cluster, image, run_parameters): """ The flow is the following: - Dry run the container with the -d flag to obtain a description of the input/output formats - Convert the cjson input geometry to conform to the container's expected format - Run the container - Convert the container output format into cjson - Ingest the output in the database """ client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) if cluster.get('name') == 'cori': cluster = _get_cori(client) if '_id' not in cluster: raise Exception('Invalid cluster configurations: %s' % cluster) oc_folder = _get_oc_folder(client) root_folder = client.createFolder( oc_folder['_id'], datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%f")) # temporary folder to save the container in/out description description_folder = client.createFolder(root_folder['_id'], 'description') # save the pull.py script to the job directory with open(os.path.join(os.path.dirname(__file__), 'utils/pull.py'), 'rb') as f: # Get the size of the file size = f.seek(0, 2) f.seek(0) name = 'pull.py' input_parameters_file = client.uploadFile(description_folder['_id'], f, name, size, parentType='folder') job = _create_description_job(task, cluster, description_folder, image, run_parameters) # Now download pull.py script to the cluster task.taskflow.logger.info( 'Downloading description input files to cluster.') download_job_input_folders(cluster, job, girder_token=task.taskflow.girder_token, submit=False) task.taskflow.logger.info('Downloading complete.') submit_job(cluster, job, girder_token=task.taskflow.girder_token, monitor=False) monitor_job.apply_async( (cluster, job), { 'girder_token': task.taskflow.girder_token, 'monitor_interval': 10 }, link=postprocess_description.s(input_, cluster, image, run_parameters, root_folder, job, description_folder))
def upload_output(task, _, cluster, job, *args, **kwargs): task.taskflow.logger.info('Uploading results from cluster') output_folder_id = kwargs['output']['folder']['id'] client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) job['output'] = [{'folderId': output_folder_id, 'path': '.'}] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload job output complete.') mesh_file_id = kwargs.pop('meshFileId') solution_files = list(_list_solution_files(client, output_folder_id)) if len(solution_files) == 0: raise Exception( 'No solution files where produced, please check output files for errors.' ) # Generate and save the first vtu file that should be loaded for this # run. This can then be used to know which file to open as part of any viz # step. file_names = [f['name'] for f in solution_files] file_names.sort() if len(file_names) > 1: vtu_file_first = '%s.vtu' % file_names[0].rsplit('.', 1)[0] # find where to put the ...*... head = tail = size = len(vtu_file_first) for fileName in file_names[1:]: vtu_name = '%s.vtu' % fileName.rsplit('.', 1)[0] for i in range(size): if vtu_file_first[i] != vtu_name[i] and head > i: head = i # not included in the cut if vtu_file_first[-i] != vtu_name[-i] and tail >= i: tail = i - 1 vtu_file = '%s*%s' % (vtu_file_first[:head], vtu_file_first[-tail:]) else: vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0] task.taskflow.set_metadata('vtuFile', vtu_file) number_files = len(solution_files) # By default export solution files to VTK format using a set of batch jobs if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']: number_of_jobs = kwargs['numberOfProcs'] task.logger.info('Generating %d export jobs' % number_of_jobs) sim_job_dir = job['dir'] jobs = [] job_index = 1 for chunk in [ solution_files[i::number_of_jobs] for i in xrange(number_of_jobs) ]: if chunk: name = 'pyfr_export_%d' % job_index mesh_filename = kwargs['meshFilename'] export_job = create_export_job(task, name, chunk, sim_job_dir, mesh_filename) submit_job(cluster, export_job, log_write_url=None, girder_token=task.taskflow.girder_token, monitor=False) jobs.append(export_job) job_index += 1 # Update the jobs list in the metadata task.taskflow.set_metadata('jobs', [j for j in jobs] + [job]) # Also save just the export job ids task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs]) monitor_jobs.apply_async( (cluster, jobs), {'girder_token': task.taskflow.girder_token}, link=upload_export_output.s(cluster, job, *args, **kwargs)) else: # The number 100 is pretty arbitrary! if number_files < 100: export_output.delay(output_folder_id, mesh_file_id, solution_files) # Break into chunks a run in parallel else: for chunk in [ solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS) ]: export_output.delay(output_folder_id, mesh_file_id, chunk)
def test_submit_job(self, get_connection, *args): cluster = { '_id': 'bob', 'type': 'ec2', 'name': 'dummy', 'config': { '_id': 'dummy', 'scheduler': { 'type': 'sge' } }, } job_id = 'dummy' job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{'tail': True, 'path': 'dummy/file/path'}] } qconf_output = ['pe_name orte', 'slots 10\n', 'user_lists NONE', 'xuser_lists NONE', 'start_proc_args /bin/true', 'stop_proc_args /bin/true', 'allocation_rule $pe_slots', 'control_slaves FALSE', 'job_is_first_task TRUE', 'urgency_slots min', 'accounting_summary FALSE'] qsub_output = ['Your job 74 ("test.sh") has been submitted'] conn = get_connection.return_value.__enter__.return_value conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output] def _get_status(url, request): content = { 'status': 'queued' } content = json.dumps(content).encode('utf8') headers = { 'content-length': len(content), 'content-type': 'application/json' } return httmock.response(200, content, headers, request=request) def _set_status(url, request): content = { 'status': 'queued' } content = json.dumps(content).encode('utf8') headers = { 'content-length': len(content), 'content-type': 'application/json' } return httmock.response(200, content, headers, request=request) def _log(url, request): content = { } content = json.dumps(content).encode('utf8') headers = { 'content-length': len(content), 'content-type': 'application/json' } return httmock.response(200, content, headers, request=request) status_url = '/api/v1/jobs/%s/status' % job_id get_status = httmock.urlmatch( path=r'^%s$' % status_url, method='GET')(_get_status) status_update_url = '/api/v1/jobs/%s' % job_id set_status = httmock.urlmatch( path=r'^%s$' % status_update_url, method='PATCH')(_set_status) log_url = '/api/v1/jobs/%s/log' % job_id log = httmock.urlmatch( path=r'^%s$' % log_url, method='POST')(_log) with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') self.assertEqual(conn.execute.call_args_list[1], mock.call('qconf -sp orte'), 'Unexpected qconf command: %s' % str(conn.execute.call_args_list[0])) # Specifying and parallel environment job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{'tail': True, 'path': 'dummy/file/path'}], 'params': { 'parallelEnvironment': 'mype' } } qconf_output = ['pe_name mype', 'slots 10\n', 'user_lists NONE', 'xuser_lists NONE', 'start_proc_args /bin/true', 'stop_proc_args /bin/true', 'allocation_rule $pe_slots', 'control_slaves FALSE', 'job_is_first_task TRUE', 'urgency_slots min', 'accounting_summary FALSE'] conn.reset_mock() conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output] with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') self.assertEqual(conn.execute.call_args_list[1], mock.call('qconf -sp mype'), 'Unexpected qconf command: %s' % str(conn.execute.call_args_list[0])) # For traditional clusters we shouldn't try to extract slot from orte cluster = { '_id': 'dummy', 'type': 'trad', 'name': 'dummy', 'config': { 'host': 'dummy', 'ssh': { 'user': '******', 'passphrase': 'its a secret' }, 'scheduler': { 'type': 'sge' } } } job_id = 'dummy' job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{'tail': True, 'path': 'dummy/file/path'}] } conn.reset_mock() conn.execute.side_effect = [['/home/test'], ['Your job 74 ("test.sh") has been submitted']] with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') # Assert that we don't try and get the number of slots self.assertFalse('qconf' in str(conn.execute.call_args_list), 'qconf should not be called') # For traditional clusters define a parallel env cluster = { '_id': 'dummy', 'type': 'trad', 'name': 'dummy', 'config': { 'host': 'dummy', 'ssh': { 'user': '******', 'passphrase': 'its a secret' }, 'scheduler': { 'type': 'sge' } } } job_id = 'dummy' job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{'tail': True, 'path': 'dummy/file/path'}], 'params': { 'parallelEnvironment': 'mype' } } conn.reset_mock() conn.execute.side_effect = [['/home/test'], qconf_output, ['Your job 74 ("test.sh") has been submitted']] with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') self.assertEqual(conn.execute.call_args_list[1], mock.call('qconf -sp mype')) self.assertEqual(job_model['params']['numberOfSlots'], 10)
def test_submit_job(self, get_connection, *args): cluster = { '_id': 'bob', 'type': 'ec2', 'name': 'dummy', 'config': { '_id': 'dummy', 'scheduler': { 'type': 'sge' } }, } job_id = 'dummy' job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{ 'tail': True, 'path': 'dummy/file/path' }] } qconf_output = [ 'pe_name orte', 'slots 10\n', 'user_lists NONE', 'xuser_lists NONE', 'start_proc_args /bin/true', 'stop_proc_args /bin/true', 'allocation_rule $pe_slots', 'control_slaves FALSE', 'job_is_first_task TRUE', 'urgency_slots min', 'accounting_summary FALSE' ] qsub_output = ['Your job 74 ("test.sh") has been submitted'] conn = get_connection.return_value.__enter__.return_value conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output] def _get_status(url, request): content = {'status': 'queued'} content = json.dumps(content).encode('utf8') headers = { 'content-length': len(content), 'content-type': 'application/json' } return httmock.response(200, content, headers, request=request) def _set_status(url, request): content = {'status': 'queued'} content = json.dumps(content).encode('utf8') headers = { 'content-length': len(content), 'content-type': 'application/json' } return httmock.response(200, content, headers, request=request) def _log(url, request): content = {} content = json.dumps(content).encode('utf8') headers = { 'content-length': len(content), 'content-type': 'application/json' } return httmock.response(200, content, headers, request=request) status_url = '/api/v1/jobs/%s/status' % job_id get_status = httmock.urlmatch(path=r'^%s$' % status_url, method='GET')(_get_status) status_update_url = '/api/v1/jobs/%s' % job_id set_status = httmock.urlmatch(path=r'^%s$' % status_update_url, method='PATCH')(_set_status) log_url = '/api/v1/jobs/%s/log' % job_id log = httmock.urlmatch(path=r'^%s$' % log_url, method='POST')(_log) with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') self.assertEqual( conn.execute.call_args_list[1], mock.call('qconf -sp orte'), 'Unexpected qconf command: %s' % str(conn.execute.call_args_list[0])) # Specifying and parallel environment job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{ 'tail': True, 'path': 'dummy/file/path' }], 'params': { 'parallelEnvironment': 'mype' } } qconf_output = [ 'pe_name mype', 'slots 10\n', 'user_lists NONE', 'xuser_lists NONE', 'start_proc_args /bin/true', 'stop_proc_args /bin/true', 'allocation_rule $pe_slots', 'control_slaves FALSE', 'job_is_first_task TRUE', 'urgency_slots min', 'accounting_summary FALSE' ] conn.reset_mock() conn.execute.side_effect = [['/home/test'], qconf_output, qsub_output] with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') self.assertEqual( conn.execute.call_args_list[1], mock.call('qconf -sp mype'), 'Unexpected qconf command: %s' % str(conn.execute.call_args_list[0])) # For traditional clusters we shouldn't try to extract slot from orte cluster = { '_id': 'dummy', 'type': 'trad', 'name': 'dummy', 'config': { 'host': 'dummy', 'ssh': { 'user': '******', 'passphrase': 'its a secret' }, 'scheduler': { 'type': 'sge' } } } job_id = 'dummy' job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{ 'tail': True, 'path': 'dummy/file/path' }] } conn.reset_mock() conn.execute.side_effect = [[ '/home/test' ], ['Your job 74 ("test.sh") has been submitted']] with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') # Assert that we don't try and get the number of slots self.assertFalse('qconf' in str(conn.execute.call_args_list), 'qconf should not be called') # For traditional clusters define a parallel env cluster = { '_id': 'dummy', 'type': 'trad', 'name': 'dummy', 'config': { 'host': 'dummy', 'ssh': { 'user': '******', 'passphrase': 'its a secret' }, 'scheduler': { 'type': 'sge' } } } job_id = 'dummy' job_model = { '_id': job_id, 'queueJobId': '1', 'name': 'dummy', 'commands': ['ls'], 'output': [{ 'tail': True, 'path': 'dummy/file/path' }], 'params': { 'parallelEnvironment': 'mype' } } conn.reset_mock() conn.execute.side_effect = [[ '/home/test' ], qconf_output, ['Your job 74 ("test.sh") has been submitted']] with httmock.HTTMock(get_status, set_status, log): job.submit_job(cluster, job_model, log_write_url='log_write_url', girder_token='girder_token') self.assertEqual(conn.execute.call_args_list[1], mock.call('qconf -sp mype')) self.assertEqual(job_model['params']['numberOfSlots'], 10)
def upload_output(task, _, cluster, job, *args, **kwargs): task.taskflow.logger.info('Uploading results from cluster') output_folder_id = kwargs['output']['folder']['id'] client = _create_girder_client( task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) job['output'] = [{ 'folderId': output_folder_id, 'path': '.' }] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload job output complete.') mesh_file_id = kwargs.pop('meshFileId') solution_files = list(_list_solution_files(client, output_folder_id)) if len(solution_files) == 0: raise Exception('No solution files where produced, please check output files for errors.') # Generate and save the first vtu file that should be loaded for this # run. This can then be used to know which file to open as part of any viz # step. file_names = [f['name'] for f in solution_files] file_names.sort() vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0] task.taskflow.set_metadata('vtuFile', vtu_file) number_files = len(solution_files) # By default export solution files to VTK format using a set of batch jobs if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']: number_of_jobs = kwargs['numberOfProcs'] task.logger.info('Generating %d export jobs' % number_of_jobs) sim_job_dir = job['dir'] jobs = [] job_index = 1 for chunk in [solution_files[i::number_of_jobs] for i in xrange(number_of_jobs)]: if chunk: name = 'pyfr_export_%d' % job_index mesh_filename = kwargs['meshFilename'] export_job = create_export_job( task, name, chunk, sim_job_dir, mesh_filename) submit_job(cluster, export_job, log_write_url=None, girder_token=task.taskflow.girder_token, monitor=False) jobs.append(export_job) job_index += 1 # Update the jobs list in the metadata task.taskflow.set_metadata('jobs', [j for j in jobs] + [job]) # Also save just the export job ids task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs]) monitor_jobs.apply_async( (cluster, jobs), {'girder_token': task.taskflow.girder_token}, link=upload_export_output.s(cluster, job, *args, **kwargs)) else: # The number 100 is pretty arbitrary! if number_files < 100: export_output.delay( output_folder_id, mesh_file_id, solution_files) # Break into chunks a run in parallel else: for chunk in [solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS)]: export_output.delay(output_folder_id, mesh_file_id, chunk)