def mount_on_sbg(input_file, s3_keys, sbg): # get important info from input_file json bucket = input_file.get('bucket_name') keys = ensure_list(input_file.get('object_key')) key_uuids = ensure_list(input_file.get('uuid')) import_ids = [] for key, key_uuid in zip(keys, key_uuids): imp_id = mount_one_on_sbg(key, key_uuid, bucket, s3_keys, sbg) import_ids.append(imp_id) return import_ids
def add_inputfile(self, upload_key, file_id, argument_name, is_list=False): ''' create appropriate input for sbg task for the specified file. Some inputs should be file arrays, i.e. a list of files attached to a single input argument_name. so if is_list is true create those as a list, otherwise create the "standard" file input ''' if is_list: input_list = ensure_list(self.inputs.get(argument_name, [])) input_list.append({ "class": "File", "name": upload_key, "path": file_id }) new_input = {argument_name: input_list} else: new_input = { argument_name: { "class": "File", "name": upload_key, "path": file_id } } self.add_input(new_input)
def export_alembic(source_node, target_path, start_frame, end_frame, bake=False, args=None, motion_blur_samples=None): """ Cache the source_node(s) to specified path. Args: source_node: source node(s) target_path: output path start_frame: start frame of this cache end_frame: end frame of thi cache bake: bake all of the prior animate information on itself. """ source = source_node source = ensure_pynode(source) source = ensure_list(source) abc_exporter = ExportAlembic() if args: for key, value in args.iteritems(): abc_exporter[key] = value if motion_blur_samples: abc_exporter.set_motionblur_sample_option( length=motion_blur_samples[0], samples=motion_blur_samples[1], threshold=motion_blur_samples[2]) abc_exporter.set_framerange(start_frame, end_frame) for node in source: abc_exporter.add_root(node) abc_exporter.export(target_path)
def real_handler(event, context): ''' this is generic function to run awsem workflow based on the data passed in workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name. Note multiple workflow_uuids can be available for an app_name (different versions of the same app could have a different uuid) ''' # get incomming data input_file_list = event.get('input_files') for infile in input_file_list: if not infile: raise ("malformed input, check your input_files") app_name = event.get('app_name') print(app_name) workflow_uuid = event.get('workflow_uuid') output_bucket = event.get('output_bucket') parameters = ff_utils.convert_param(event.get('parameters'), True) tibanna_settings = event.get('_tibanna', {}) tag = event.get('tag') # if they don't pass in env guess it from output_bucket try: env = tibanna_settings.get('env', '-'.join(output_bucket.split('-')[1:-1])) # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, ff_keys=event.get('ff_keys'), settings=tibanna_settings) except Exception as e: raise TibannaStartException("%s" % e) args = dict() # get argument format & type info from workflow workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys, ff_env=tibanna.env, add_on='frame=object') print("workflow info %s" % workflow_info) LOG.info("workflow info %s" % workflow_info) if 'error' in workflow_info.get('@type', []): raise Exception("FATAL, can't lookup workflow info for %s fourfront" % workflow_uuid) # get cwl info from workflow_info for k in [ 'app_name', 'app_version', 'cwl_directory_url', 'cwl_main_filename', 'cwl_child_filenames' ]: print(workflow_info.get(k)) LOG.info(workflow_info.get(k)) args[k] = workflow_info.get(k) if not args['cwl_child_filenames']: args['cwl_child_filenames'] = [] # switch to v1 if available if 'cwl_directory_url_v1' in workflow_info: # use CWL v1 args['cwl_directory_url'] = workflow_info['cwl_directory_url_v1'] args['cwl_version'] = 'v1' else: args['cwl_version'] = 'draft3' # create the ff_meta output info input_files = [] for input_file in input_file_list: for idx, uuid in enumerate(ensure_list(input_file['uuid'])): input_files.append({ 'workflow_argument_name': input_file['workflow_argument_name'], 'value': uuid, 'ordinal': idx + 1 }) print("input_files is %s" % input_files) LOG.info("input_files is %s" % input_files) # input file args for awsem for input_file in input_file_list: process_input_file_info(input_file, tibanna.ff_keys, tibanna.env, args) # source experiments input_file_uuids = [_['uuid'] for _ in input_file_list] pf_source_experiments = merge_source_experiments(input_file_uuids, tibanna.ff_keys, tibanna.env) # processed file metadata output_files, pf_meta = handle_processed_files( workflow_info, tibanna, pf_source_experiments, custom_fields=event.get('custom_pf_fields'), user_supplied_output_files=event.get('output_files')) print("output files= %s" % str(output_files)) # 4DN dcic award and lab are used here, unless provided in wfr_meta ff_meta = create_ffmeta_awsem( workflow_uuid, app_name, input_files, tag=tag, run_url=tibanna.settings.get('url', ''), output_files=output_files, parameters=parameters, extra_meta=event.get('wfr_meta'), ) print("ff_meta is %s" % ff_meta.__dict__) LOG.info("ff_meta is %s" % ff_meta.__dict__) # store metadata so we know the run has started ff_meta.post(key=tibanna.ff_keys) # parameters args['input_parameters'] = event.get('parameters') # output target args['output_target'] = dict() args['secondary_output_target'] = dict() for of in ff_meta.output_files: arg_name = of.get('workflow_argument_name') if of.get('type') == 'Output processed file': args['output_target'][arg_name] = of.get('upload_key') else: args['output_target'][arg_name] = ff_meta.uuid + '/' + arg_name if 'secondary_file_formats' in of: # takes only the first secondary file. args['secondary_output_target'][arg_name] \ = [_.get('upload_key') for _ in of.get('extra_files', [{}, ])] # output bucket args['output_S3_bucket'] = event.get('output_bucket') # initialize config parameters as null for benchmarking config = event['config'] if 'instance_type' not in config: config['instance_type'] = '' if 'EBS_optimized' not in config: config['EBS_optimized'] = '' if 'ebs_size' not in config: config['ebs_size'] = 0 event.update({ "ff_meta": ff_meta.as_dict(), 'pf_meta': [meta.as_dict() for meta in pf_meta], "_tibanna": tibanna.as_dict(), "args": args }) return (event)
def handler(event, context): ''' this is generic function to run sbg workflow based on the data passed in workflow_uuid : for now, pass this on. Later we can add a code to automatically retrieve this from app_name. Note multiple workflow_uuids can be available for an app_name (different versions of the same app could have a different uuid) ''' # get incomming data input_file_list = event.get('input_files') app_name = event.get('app_name') parameter_dict = event.get('parameters') workflow_uuid = event.get('workflow_uuid') output_bucket = event.get('output_bucket') tibanna_settings = event.get('_tibanna', {}) # if they don't pass in env guess it from output_bucket env = tibanna_settings.get('env', '-'.join(output_bucket.split('-')[1:-1])) # tibanna provides access to keys based on env and stuff like that tibanna = Tibanna(env, s3_keys=event.get('s3_keys'), ff_keys=event.get('ff_keys'), settings=tibanna_settings) LOG.info("input data is %s" % event) # represents the SBG info we need sbg = sbg_utils.create_sbg_workflow(app_name, tibanna.sbg_keys) LOG.info("sbg is %s" % sbg.__dict__) # represents the workflow metadata to be stored in fourfront parameters, _ = sbg_utils.to_sbg_workflow_args(parameter_dict, vals_as_string=True) # get argument format & type info from workflow workflow_info = ff_utils.get_metadata(workflow_uuid, key=tibanna.ff_keys) LOG.info("workflow info %s" % workflow_info) if 'error' in workflow_info.get('@type', []): raise Exception("FATAL, can't lookupt workflow info for % fourfront" % workflow_uuid) # This dictionary has a key 'arguments' with a value # { 'workflow_argument_name': ..., 'argument_type': ..., 'argument_format': ... } # get format-extension map try: fp_schema = ff_utils.get_metadata("profiles/file_processed.json", key=tibanna.ff_keys) fe_map = fp_schema.get('file_format_file_extension') except Exception as e: LOG.error( "Can't get format-extension map from file_processed schema. %s\n" % e) # processed file metadata output_files = [] try: if 'arguments' in workflow_info: pf_meta = [] for arg in workflow_info.get('arguments'): if (arg.get('argument_type') in [ 'Output processed file', 'Output report file', 'Output QC file' ]): of = dict() of['workflow_argument_name'] = arg.get( 'workflow_argument_name') of['type'] = arg.get('argument_type') if 'argument_format' in arg: # These are not processed files but report or QC files. pf = ff_utils.ProcessedFileMetadata( file_format=arg.get('argument_format')) try: resp = pf.post( key=tibanna.ff_keys ) # actually post processed file metadata here resp = resp.get('@graph')[0] of['upload_key'] = resp.get('upload_key') of['value'] = resp.get('uuid') except Exception as e: LOG.error( "Failed to post Processed file metadata. %s\n" % e) LOG.error("resp" + str(resp) + "\n") raise e of['format'] = arg.get('argument_format') of['extension'] = fe_map.get( arg.get('argument_format')) pf_meta.append(pf) output_files.append(of) except Exception as e: LOG.error("output_files = " + str(output_files) + "\n") LOG.error("Can't prepare output_files information. %s\n" % e) raise e # create the ff_meta output info input_files = [] for input_file in input_file_list: for idx, uuid in enumerate(ensure_list(input_file['uuid'])): input_files.append({ 'workflow_argument_name': input_file['workflow_argument_name'], 'value': uuid, 'ordinal': idx + 1 }) LOG.info("input_files is %s" % input_files) ff_meta = ff_utils.create_ffmeta(sbg, workflow_uuid, input_files, parameters, run_url=tibanna.settings.get('url', ''), output_files=output_files) LOG.info("ff_meta is %s" % ff_meta.__dict__) # store metadata so we know the run has started ff_meta.post(key=tibanna.ff_keys) # mount all input files to sbg this will also update sbg to store the import_ids for infile in input_file_list: imps = mount_on_sbg(infile, tibanna.s3_keys, sbg) infile['import_ids'] = imps # create a link to the output directory as well if output_bucket: sbg_volume = sbg_utils.create_sbg_volume_details() res = sbg.create_volumes(sbg_volume, output_bucket, public_key=tibanna.s3_keys['key'], secret_key=tibanna.s3_keys['secret']) vol_id = res.get('id') if not vol_id: # we got an error raise Exception("Unable to mount output volume, error is %s " % res) sbg.output_volume_id = vol_id # let's not pass keys in plain text parameters return { "input_file_args": input_file_list, "workflow": sbg.as_dict(), "ff_meta": ff_meta.as_dict(), 'pf_meta': [meta.as_dict() for meta in pf_meta], "_tibanna": tibanna.as_dict(), "parameter_dict": parameter_dict }
def test_ensure_list(): assert ensure_list(5) == [5] assert ensure_list('hello') == ['hello'] assert ensure_list(['hello']) == ['hello'] assert ensure_list({'a': 'b'}) == [{'a': 'b'}]