def create_and_post_processed_file(ff_keys, file_format, secondary_file_formats, source_experiments=None, other_fields=None): printlog(file_format) if not file_format: raise Exception("file format for processed file must be provided") if secondary_file_formats: extra_files = [{ "file_format": parse_formatstr(v) } for v in secondary_file_formats] else: extra_files = None pf = ProcessedFileMetadata(file_format=file_format, extra_files=extra_files, source_experiments=source_experiments, other_fields=other_fields) # actually post processed file metadata here resp = pf.post(key=ff_keys) if resp and '@graph' in resp: resp = resp.get('@graph')[0] else: raise Exception("Failed to post Processed file metadata.\n") return pf, resp
def test_create_ProcessedFileMetadata_from_get_error_if_no_at_type( ff_keys, proc_file_in_webdev): # can use acc, uuid, @id, any valid url with mock.patch('core.pony_utils.get_metadata', return_value=proc_file_in_webdev): with pytest.raises(Exception) as expinfo: ProcessedFileMetadata.get(proc_file_in_webdev['accession'], ff_keys) assert "only load ProcessedFiles" in str(expinfo.value)
def test_add_md5_filesize_to_pf_extra(): wff = AwsemFile(bucket='somebucket', key='somekey.pairs.gz.px2', runner=None, md5='somemd5', filesize=1234, argument_type='Output processed file', format_if_extra='pairs_px2') pf = ProcessedFileMetadata(extra_files=[{'file_format': 'lalala'}, {'file_format': 'pairs_px2'}]) add_md5_filesize_to_pf_extra(pf, wff) assert 'md5sum' in pf.extra_files[1] assert 'file_size' in pf.extra_files[1] assert pf.extra_files[1]['md5sum'] == 'somemd5' assert pf.extra_files[1]['file_size'] == 1234
def test_create_ProcessedFileMetadata_from_get(ff_keys, proc_file_in_webdev): # can use acc, uuid, @id, any valid url file_with_type = proc_file_in_webdev.copy() file_with_type['@type'] = ['FileProcessed', 'Item', 'whatever'] with mock.patch('core.pony_utils.get_metadata', return_value=file_with_type) as ff: pf = ProcessedFileMetadata.get(proc_file_in_webdev['accession'], ff_keys) assert pf.__dict__ == proc_file_in_webdev assert type(pf) is ProcessedFileMetadata ff.was_called_once()
def user_supplied_proc_file(user_supplied_output_files, arg_name, tibanna): if not user_supplied_output_files: raise Exception("user supplied processed files missing\n") of = [ output for output in user_supplied_output_files if output.get('workflow_argument_name') == arg_name ] if of: if len(of) > 1: raise Exception( "multiple output files supplied with same workflow_argument_name" ) of = of[0] return ProcessedFileMetadata.get(of.get('uuid'), tibanna.ff_keys, tibanna.env, return_data=True) else: printlog("no output_files found in input_json matching arg_name") printlog("user_supplied_output_files: %s" % str(user_supplied_output_files)) printlog("arg_name: %s" % str(arg_name)) printlog("tibanna is %s" % str(tibanna)) raise Exception("user supplied processed files missing\n")
def proc_file_for_arg_name(output_files, arg_name, tibanna): if not output_files: LOG.info("proc_file_for_arg_name no ouput_files specified") return None, None of = [ output for output in output_files if output.get('workflow_argument_name') == arg_name ] if of: if len(of) > 1: raise Exception( "multiple output files supplied with same workflow_argument_name" ) of = of[0] return ProcessedFileMetadata.get(of.get('uuid'), tibanna.ff_keys, tibanna.env, return_data=True) else: LOG.info("no output_files found in input_json matching arg_name") LOG.info("output_files: %s" % str(output_files)) LOG.info("arg_name: %s" % str(arg_name)) LOG.info("tibanna is %s" % str(tibanna)) return None, None
def real_handler(event, context): # check the status and other details of import ''' this is to check if the task run is done: http://docs.sevenbridges.com/reference#get-task-execution-details ''' # get data # used to automatically determine the environment tibanna_settings = event.get('_tibanna', {}) try: tibanna = Tibanna(tibanna_settings['env'], settings=tibanna_settings) except Exception as e: raise TibannaStartException("%s" % e) ff_meta = create_ffmeta_awsem( app_name=event.get('ff_meta').get('awsem_app_name'), **event.get('ff_meta')) if event.get('error', False): ff_meta.run_status = 'error' ff_meta.description = event.get('error') patch_res = ff_meta.patch(key=tibanna.ff_keys) printlog("patch response: " + str(patch_res)) # sending a notification email before throwing error if 'email' in event['config'] and event['config']['email']: try: send_notification_email( event['_tibanna']['settings']['run_name'], event['jobid'], ff_meta.run_status, event['_tibanna']['settings']['url']) except Exception as e: printlog("Cannot send email: %s" % e) raise Exception(event.get('error')) metadata_only = event.get('metadata_only', False) pf_meta = [ProcessedFileMetadata(**pf) for pf in event.get('pf_meta')] custom_qc_fields = event.get('custom_qc_fields', None) # ensure this bad boy is always initialized awsem = Awsem(event) # go through this and replace awsemfile_report with awsf format # actually interface should be look through ff_meta files and call # give me the status of this thing from the runner, and runner.output_files.length # so we just build a runner with interface to sbg and awsem # runner.output_files.length() # runner.output_files.file.status # runner.output_files.file.loc # runner.output_files.file.get awsem_output = awsem.output_files() awsem_output_extra = awsem.secondary_output_files() ff_output = len(ff_meta.output_files) if len(awsem_output) != ff_output: ff_meta.run_status = 'error' ff_meta.description = "%d files output expected %s" % ( ff_output, len(awsem_output)) ff_meta.patch(key=tibanna.ff_keys) raise Exception( "Failing the workflow because outputed files = %d and ffmeta = %d" % (awsem_output, ff_output)) def update_metadata_from_awsemfile_list(awsemfile_list): patch_meta = False for awsemfile in awsemfile_list: patch_meta = update_ffmeta_from_awsemfile(awsemfile, ff_meta, tibanna, custom_qc_fields) if not metadata_only: update_pfmeta_from_awsemfile(awsemfile, pf_meta, tibanna) # allow for a simple way for updater to add appropriate meta_data if patch_meta: ff_meta.__dict__.update(patch_meta) update_metadata_from_awsemfile_list(awsem_output) update_metadata_from_awsemfile_list(awsem_output_extra) # if we got all the awsemfiles let's go ahead and update our ff_metadata object ff_meta.run_status = "complete" # add postrunjson log file to ff_meta as a url ff_meta.awsem_postrun_json = get_postrunjson_url(event) # make all the file awsemfile meta-data stuff here # TODO: fix bugs with ff_meta mapping for output and input file try: ff_meta.patch(key=tibanna.ff_keys) except Exception as e: raise Exception("Failed to update run_status %s" % str(e)) # patch processed files - update only status, extra_files, md5sum and file_size if pf_meta: patch_fields = [ 'uuid', 'status', 'extra_files', 'md5sum', 'file_size', 'higlass_uid' ] try: for pf in pf_meta: printlog(pf.as_dict()) pf.patch(key=tibanna.ff_keys, fields=patch_fields) except Exception as e: raise Exception("Failed to update processed metadata %s" % str(e)) event['ff_meta'] = ff_meta.as_dict() event['pf_meta'] = [_.as_dict() for _ in pf_meta] # sending a notification email after the job finishes if 'email' in event['config'] and event['config']['email']: try: send_notification_email(event['_tibanna']['settings']['run_name'], event['jobid'], event['ff_meta']['run_status'], event['_tibanna']['settings']['url']) except Exception as e: printlog("Cannot send email: %s" % e) return event
def handle_processed_files(workflow_info, tibanna, pf_source_experiments=None, custom_fields=None, user_supplied_output_files=None): output_files = [] pf_meta = [] fe_map = None try: print("Inside handle_processed_files") LOG.info("Inside handle_processed_files") for arg in workflow_info.get('arguments', []): print("processing arguments %s" % str(arg)) LOG.info("processing arguments %s" % str(arg)) if (arg.get('argument_type') in [ 'Output processed file', 'Output report file', 'Output QC file' ]): of = dict() argname = of['workflow_argument_name'] = arg.get( 'workflow_argument_name') of['type'] = arg.get('argument_type') # see if user supplied the output file already # this is often the case for pseudo workflow runs (run externally) # TODO move this down next to post of pf pf, resp = proc_file_for_arg_name( user_supplied_output_files, arg.get('workflow_argument_name'), tibanna) if pf: print( "proc_file_for_arg_name returned %s \nfrom ff result of\n %s" % (str(pf.__dict__), str(resp))) LOG.info( "proc_file_for_arg_name returned %s \nfrom ff result of\n %s" % (str(pf.__dict__), str(resp))) pf_meta.append(pf) else: print( "proc_file_for_arg_name returned %s \nfrom ff result of\n %s" % (str(pf), str(resp))) LOG.info( "proc_file_for_arg_name returned %s \nfrom ff result of\n %s" % (str(pf), str(resp))) if not resp: # if it wasn't supplied as output we have to create a new one assert user_supplied_output_files is None if of['type'] == 'Output processed file': print("creating new processedfile") LOG.info("creating new processedfile") if 'argument_format' not in arg: raise Exception( "argument format for processed file must be provided" ) if not fe_map: fe_map = get_format_extension_map(tibanna.ff_keys) # These are not processed files but report or QC files. of['format'] = arg.get('argument_format') of['extension'] = fe_map.get( arg.get('argument_format')) if 'secondary_file_formats' in arg: of['secondary_file_formats'] = arg.get( 'secondary_file_formats') of['secondary_file_extensions'] = [ fe_map.get(v) for v in arg.get('secondary_file_formats') ] extra_files = [{ "file_format": v } for v in of['secondary_file_formats']] else: extra_files = None pf_other_fields = dict() if custom_fields: if argname in custom_fields: pf_other_fields.update(custom_fields[argname]) if 'ALL' in custom_fields: pf_other_fields.update(custom_fields['ALL']) pf = ProcessedFileMetadata( file_format=arg.get('argument_format'), extra_files=extra_files, source_experiments=pf_source_experiments, other_fields=pf_other_fields) try: # actually post processed file metadata here resp = pf.post(key=tibanna.ff_keys) resp = resp.get('@graph')[0] except Exception as e: LOG.error( "Failed to post Processed file metadata. %s\n" % e) LOG.error("resp" + str(resp) + "\n") raise e pf_meta.append(pf) if resp: of['upload_key'] = resp.get('upload_key') of['value'] = resp.get('uuid') of['extra_files'] = resp.get('extra_files') output_files.append(of) except Exception as e: LOG.error("output_files = " + str(output_files) + "\n") LOG.error("Can't prepare output_files information. %s\n" % e) raise e return output_files, pf_meta