def upload_output(task, cluster, job, *args, **kwargs): # Girder client client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) # Register generated file into girder task.taskflow.logger.info('Uploading results from cluster') output_folder_id = parse('output.folder.id').find(kwargs) if output_folder_id: task.taskflow.logger.info('Exporting to folder %s' % output_folder_id) output_folder_id = output_folder_id[0].value job['output'] = [{'folderId': output_folder_id, 'path': '.'}] else: task.taskflow.logger.info('No output.folder.id') # Upload files metadata upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) # Done... task.taskflow.logger.info('Upload complete.')
def postprocess_job(task, _, input_, cluster, image, run_parameters, root_folder, container_description, input_folder, output_folder, scratch_folder, job): task.taskflow.logger.info('Processing the results of the job.') client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) upload_job_output_to_folder(cluster, job, girder_token=task.taskflow.girder_token) # remove temporary input folder folder, this data is attached to the calculation model client.delete('folder/%s' % input_folder['_id']) # clean up the scratch folder keep_scratch = run_parameters.get('keepScratch', False) if keep_scratch: scratch_folder_id = scratch_folder['_id'] else: client.delete('folder/%s' % scratch_folder['_id']) scratch_folder_id = None # ingest the output of the calculation output_format = container_description['output']['format'] output_file = None output_items = list(client.listItem(output_folder['_id'])) for item in output_items: if item['name'] == 'output.%s' % output_format: files = list(client.listFile(item['_id'])) if len(files) != 1: raise Exception( 'Expecting a single file under item, found: %s' + len(files)) output_file = files[0] break if output_file is None: raise Exception('The calculation did not produce any output file.') # Now call endpoint to ingest result body = { 'fileId': output_file['_id'], 'format': output_format, 'public': True, 'image': image, # image now also has a digest field, add it to the calculation 'scratchFolderId': scratch_folder_id } client.put('calculations/%s' % input_['calculation']['_id'], json=body)
def postprocess_job(task, _, user, cluster, image, job, folder, container): task.taskflow.logger.info('Finished pulling the container') task.taskflow.logger.info('Processing the results of the pull.') client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) upload_job_output_to_folder(cluster, job, girder_token=task.taskflow.girder_token) output_items = list(client.listItem(folder['_id'])) output_file = None for item in output_items: if item['name'] == OUTPUT_FILE: files = list(client.listFile(item['_id'])) if len(files) != 1: log_and_raise( task, 'Expecting a single file under item, found: %s' % len(files)) output_file = files[0] break if output_file is None: log_and_raise(task, 'Could not locate output file: %s' % OUTPUT_FILE) # Download the output file output_io = io.BytesIO() client.downloadFile(output_file['_id'], output_io) # Remove the folder client.delete('folder/%s' % folder['_id']) pull_json = json.loads(output_io.getvalue().decode('utf-8')) image_uri = pull_json.get('imageUri') # Convert size to GB size = round(pull_json.get('size', 0) / 1.e9, 2) _ensure_image_uri_is_valid(task, container, image_uri) repository = image.get('repository') tag = image.get('tag') digest = _extract_digest(container, image_uri) post_image_to_database(client, container, repository, tag, digest, cluster, size) task.taskflow.logger.info('Success!')
def upload_output(task, cluster, job, *args, **kwargs): task.taskflow.logger.info('Uploading results from cluster') # Refresh state of job client = create_girder_client( task.taskflow.girder_api_url, task.taskflow.girder_token) job = client.get('jobs/%s' % job['_id']) output_folder_id = parse('output.folder.id').find(kwargs) if output_folder_id: output_folder_id = output_folder_id[0].value job['output'] = [{ 'folderId': output_folder_id, 'path': '.' }] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload complete.')
def upload_output(task, cluster, job, *args, **kwargs): task.taskflow.logger.info('Uploading results from cluster') # Refresh state of job client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) job = client.get('jobs/%s' % job['_id']) output_folder_id = parse('output.folder.id').find(kwargs) if output_folder_id: output_folder_id = output_folder_id[0].value job['output'] = [{'folderId': output_folder_id, 'path': '.'}] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload complete.')
def upload_export_output(task, _, cluster, job, *args, **kwargs): output_folder_id = kwargs['output']['folder']['id'] client = create_girder_client( task.taskflow.girder_api_url, task.taskflow.girder_token) for job_id in task.taskflow.get_metadata('export_jobs')['export_jobs']: # Get job export_job = client.get('jobs/%s' % job_id) export_job['output'] = [{ 'folderId': output_folder_id, 'path': '.' }] upload_job_output_to_folder(cluster, export_job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) # Upload the vtu files girder_token = task.taskflow.girder_token download_path_from_cluster(cluster, girder_token, output_folder_id, job['dir'], include=['^.*\\.vtu$'])
def upload_export_output(task, _, cluster, job, *args, **kwargs): output_folder_id = kwargs['output']['folder']['id'] client = _create_girder_client( task.taskflow.girder_api_url, task.taskflow.girder_token) for job_id in task.taskflow.get_metadata('export_jobs')['export_jobs']: # Get job export_job = client.get('jobs/%s' % job_id) export_job['output'] = [{ 'folderId': output_folder_id, 'path': '.' }] upload_job_output_to_folder(cluster, export_job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) # Upload the vtu files girder_token = task.taskflow.girder_token download_path_from_cluster(cluster, girder_token, output_folder_id, job['dir'], include=['^.*\\.vtu$'])
def upload_output(task, upstream_result): task.taskflow.logger.info('Uploading results from cluster') output_folder_id = upstream_result['output']['folder']['id'] cluster = upstream_result['cluster'] job = upstream_result['job'] client = create_girder_client( task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) job['output'] = [{ 'folderId': output_folder_id, 'path': '.' }] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload job output complete.') return upstream_result
def postprocess(task, _, run_folder, input_, cluster, job): task.taskflow.logger.info('Uploading results from cluster') client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) output_folder = client.createFolder(run_folder['_id'], 'output') # Refresh state of job job = client.get('jobs/%s' % job['_id']) job['output'] = [{'folderId': output_folder['_id'], 'path': '.'}] upload_job_output_to_folder(cluster, job, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload job output complete.') input_file_name = task.taskflow.get_metadata('inputFileName') input_file_name # Call to ingest the files for item in client.listItem(output_folder['_id']): if item['name'].endswith('.json'): files = list(client.listFile(item['_id'])) if len(files) != 1: raise Exception( 'Expecting a single file under item, found: %s' + len(files)) json_output_file_id = files[0]['_id'] # Now call endpoint to ingest result body = { 'calculationId': input_['calculation']['_id'], 'fileId': json_output_file_id, 'public': True } client.post('molecules', json=body)
def postprocess_description(task, _, input_, cluster, image, run_parameters, root_folder, description_job, description_folder): task.taskflow.logger.info('Processing description job output.') client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job description_job = client.get('jobs/%s' % description_job['_id']) upload_job_output_to_folder(cluster, description_job, girder_token=task.taskflow.girder_token) description_items = list(client.listItem(description_folder['_id'])) description_file = None pull_file = None for item in description_items: if item['name'] == 'description.json': files = list(client.listFile(item['_id'])) if len(files) != 1: raise Exception( 'Expecting a single file under item, found: %s' + len(files)) description_file = files[0] elif item['name'] == 'pull.json': files = list(client.listFile(item['_id'])) if len(files) != 1: raise Exception( 'Expecting a single file under item, found: %s' + len(files)) pull_file = files[0] if pull_file is None: raise Exception( 'There was an error trying to pull the requested container image') if description_file is None: raise Exception( 'The container does not implement correctly the --description flag' ) with tempfile.TemporaryFile() as tf: client.downloadFile(pull_file['_id'], tf) tf.seek(0) container_pull = json.loads(tf.read().decode()) image = container_pull with tempfile.TemporaryFile() as tf: client.downloadFile(description_file['_id'], tf) tf.seek(0) container_description = json.loads(tf.read().decode()) # remove temporary description folder client.delete('folder/%s' % description_folder['_id']) setup_input.delay(input_, cluster, image, run_parameters, root_folder, container_description)
def upload_output(task, _, cluster, job, *args, **kwargs): task.taskflow.logger.info('Uploading results from cluster') output_folder_id = kwargs['output']['folder']['id'] client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) job['output'] = [{'folderId': output_folder_id, 'path': '.'}] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload job output complete.') mesh_file_id = kwargs.pop('meshFileId') solution_files = list(_list_solution_files(client, output_folder_id)) if len(solution_files) == 0: raise Exception( 'No solution files where produced, please check output files for errors.' ) # Generate and save the first vtu file that should be loaded for this # run. This can then be used to know which file to open as part of any viz # step. file_names = [f['name'] for f in solution_files] file_names.sort() if len(file_names) > 1: vtu_file_first = '%s.vtu' % file_names[0].rsplit('.', 1)[0] # find where to put the ...*... head = tail = size = len(vtu_file_first) for fileName in file_names[1:]: vtu_name = '%s.vtu' % fileName.rsplit('.', 1)[0] for i in range(size): if vtu_file_first[i] != vtu_name[i] and head > i: head = i # not included in the cut if vtu_file_first[-i] != vtu_name[-i] and tail >= i: tail = i - 1 vtu_file = '%s*%s' % (vtu_file_first[:head], vtu_file_first[-tail:]) else: vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0] task.taskflow.set_metadata('vtuFile', vtu_file) number_files = len(solution_files) # By default export solution files to VTK format using a set of batch jobs if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']: number_of_jobs = kwargs['numberOfProcs'] task.logger.info('Generating %d export jobs' % number_of_jobs) sim_job_dir = job['dir'] jobs = [] job_index = 1 for chunk in [ solution_files[i::number_of_jobs] for i in xrange(number_of_jobs) ]: if chunk: name = 'pyfr_export_%d' % job_index mesh_filename = kwargs['meshFilename'] export_job = create_export_job(task, name, chunk, sim_job_dir, mesh_filename) submit_job(cluster, export_job, log_write_url=None, girder_token=task.taskflow.girder_token, monitor=False) jobs.append(export_job) job_index += 1 # Update the jobs list in the metadata task.taskflow.set_metadata('jobs', [j for j in jobs] + [job]) # Also save just the export job ids task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs]) monitor_jobs.apply_async( (cluster, jobs), {'girder_token': task.taskflow.girder_token}, link=upload_export_output.s(cluster, job, *args, **kwargs)) else: # The number 100 is pretty arbitrary! if number_files < 100: export_output.delay(output_folder_id, mesh_file_id, solution_files) # Break into chunks a run in parallel else: for chunk in [ solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS) ]: export_output.delay(output_folder_id, mesh_file_id, chunk)
def postprocess_job(task, _, input_, cluster, image, run_parameters, root_folder, container_description, input_folder, output_folder, scratch_folder, run_folder, job): task.taskflow.logger.info('Processing the results of the calculation.') client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) upload_job_output_to_folder(cluster, job, girder_token=task.taskflow.girder_token) # remove temporary input folder, this data is attached to the calculation model client.delete('folder/%s' % input_folder['_id']) # clean up the scratch folder keep_scratch = run_parameters.get('keepScratch', False) if keep_scratch: scratch_folder_id = scratch_folder['_id'] else: client.delete('folder/%s' % scratch_folder['_id']) scratch_folder_id = None # ingest the output of the calculation output_format = container_description['output']['format'] output_files = [] output_items = list(client.listItem(output_folder['_id'])) for i in range(len(input_['calculations'])): output_file = None for item in output_items: if item['name'] == 'output_' + str(i + 1) + '.%s' % output_format: files = list(client.listFile(item['_id'])) if len(files) != 1: log_std_err(task, client, run_folder) log_and_raise( task, 'Expecting a single file under item, found: %s' % len(files)) output_file = files[0] break if output_file is None: # Log the job stderr log_std_err(task, client, run_folder) log_and_raise(task, 'The calculation did not produce any output file.') output_files.append(output_file) # remove the run folder, only useful to access the stdout and stderr after the job is done client.delete('folder/%s' % run_folder['_id']) # Now call endpoint to ingest result params = {'detectBonds': True} task.taskflow.logger.info( 'Uploading the results of the calculation to the database.') code = task.taskflow.get_metadata('code') if isinstance(code, dict): # Get the contents of "code" to set it below code = code.get('code') for i, output_file in enumerate(output_files): body = { 'fileId': output_file['_id'], 'format': output_format, 'public': True, 'image': image, # image now also has a digest field, add it to the calculation 'scratchFolderId': scratch_folder_id, 'code': code } client.put('calculations/%s' % input_['calculations'][i], parameters=params, json=body) task.taskflow.logger.log(STATUS_LEVEL, 'Done!')
def postprocess_description(task, _, input_, user, cluster, image, run_parameters, root_folder, description_job, description_folder): task.taskflow.logger.info( 'Processing the output of the container description job.') client = create_girder_client(task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job description_job = client.get('jobs/%s' % description_job['_id']) upload_job_output_to_folder(cluster, description_job, girder_token=task.taskflow.girder_token) description_items = list(client.listItem(description_folder['_id'])) description_file = None for item in description_items: if item['name'] == 'description.json': files = list(client.listFile(item['_id'])) if len(files) != 1: log_std_err(task, client, description_folder) log_and_raise( task, 'Expecting a single file under item, found: %s' % len(files)) description_file = files[0] if description_file is None: log_std_err(task, client, description_folder) log_and_raise( task, 'The container does not implement correctly the --description flag' ) with client.session() as session: # If we have a NEWT session id we need set as a cookie so the redirect # to the NEWT API works ( is authenticated ). newt_session_id = parse('newt.sessionId').find(user) if newt_session_id: newt_session_id = newt_session_id[0].value session.cookies.set('newt_sessionid', newt_session_id) with tempfile.TemporaryFile() as tf: client.downloadFile(description_file['_id'], tf) tf.seek(0) container_description = json.loads(tf.read().decode()) # Add code name and version to the taskflow metadata code = { 'name': container_description.get('name'), 'version': container_description.get('version') } task.taskflow.set_metadata('code', code) # remove temporary description folder client.delete('folder/%s' % description_folder['_id']) setup_input.delay(input_, cluster, image, run_parameters, root_folder, container_description)
def upload_output(task, _, cluster, job, *args, **kwargs): task.taskflow.logger.info('Uploading results from cluster') output_folder_id = kwargs['output']['folder']['id'] client = _create_girder_client( task.taskflow.girder_api_url, task.taskflow.girder_token) # Refresh state of job job = client.get('jobs/%s' % job['_id']) job['output'] = [{ 'folderId': output_folder_id, 'path': '.' }] upload_job_output_to_folder(cluster, job, log_write_url=None, job_dir=None, girder_token=task.taskflow.girder_token) task.taskflow.logger.info('Upload job output complete.') mesh_file_id = kwargs.pop('meshFileId') solution_files = list(_list_solution_files(client, output_folder_id)) if len(solution_files) == 0: raise Exception('No solution files where produced, please check output files for errors.') # Generate and save the first vtu file that should be loaded for this # run. This can then be used to know which file to open as part of any viz # step. file_names = [f['name'] for f in solution_files] file_names.sort() vtu_file = '%s.vtu' % file_names[0].rsplit('.', 1)[0] task.taskflow.set_metadata('vtuFile', vtu_file) number_files = len(solution_files) # By default export solution files to VTK format using a set of batch jobs if not 'exportInTaskFlow' in kwargs or not kwargs['exportInTaskFlow']: number_of_jobs = kwargs['numberOfProcs'] task.logger.info('Generating %d export jobs' % number_of_jobs) sim_job_dir = job['dir'] jobs = [] job_index = 1 for chunk in [solution_files[i::number_of_jobs] for i in xrange(number_of_jobs)]: if chunk: name = 'pyfr_export_%d' % job_index mesh_filename = kwargs['meshFilename'] export_job = create_export_job( task, name, chunk, sim_job_dir, mesh_filename) submit_job(cluster, export_job, log_write_url=None, girder_token=task.taskflow.girder_token, monitor=False) jobs.append(export_job) job_index += 1 # Update the jobs list in the metadata task.taskflow.set_metadata('jobs', [j for j in jobs] + [job]) # Also save just the export job ids task.taskflow.set_metadata('export_jobs', [j['_id'] for j in jobs]) monitor_jobs.apply_async( (cluster, jobs), {'girder_token': task.taskflow.girder_token}, link=upload_export_output.s(cluster, job, *args, **kwargs)) else: # The number 100 is pretty arbitrary! if number_files < 100: export_output.delay( output_folder_id, mesh_file_id, solution_files) # Break into chunks a run in parallel else: for chunk in [solution_files[i::NUMBER__OF_EXPORT_TASKS] for i in xrange(NUMBER__OF_EXPORT_TASKS)]: export_output.delay(output_folder_id, mesh_file_id, chunk)