def main(): rx = Reactor() m = rx.context.message_dict job = PipelineJob( rx, 'transcriptic', 'Yeast-Gates', 'sample.transcriptic.aq1btsj94wghbk', 'measurement.transcriptic.sample.transcriptic.aq1btsj94wghbk.2') job.setup(data=m) # Set up and launch Agave jobs with callbacks based on job.callback job_def = { 'appId': 'hello-agave-cli-0.1.0u1', 'notifications': [{ 'event': '*', 'persistent': False, 'url': job.callback + '&status=${STATUS}' }] } try: resp = rx.client.jobs.submit(body=job_def) job_id = None if 'id' in resp: job_id = resp['id'] job.running({'launched': job_id}) else: job.fail() except Exception as exc: job.cancel() rx.on_failure('Failed to launch pipeline', exc)
def main(): rx = Reactor() mes = AttrDict(rx.context.message_dict) rx.logger.info('raw_message: {}'.format(rx.context.raw_message)) if mes == {}: try: jsonmsg = json.loads(rx.context.raw_message) mes = jsonmsg except Exception: pass # ['event', 'agavejobs', 'index', 'indexed'] action = "urlparams" try: for a in ["index", "indexed"]: try: rx.logger.debug("Checking against schema {}".format(a)) rx.validate_message(mes, messageschema="/schemas/" + a + ".jsonschema", permissive=False) action = a break except Exception as exc: print("Validation error: {}".format(exc)) if action is None: pprint(mes) raise ValidationError("Unknown schema") except Exception as vexc: rx.on_failure("Message was not processed", vexc) rx.logger.debug("Schema: {}".format(action)) # for k, v in os.environ.items(): # rx.logger.debug("env:{}={}".format(k, v)) PARAMS = [ ("uuid", "uuid", None), ("token", "token", None), ("level", "level", "1"), ("filters", "filters", None), ] # Look in the message, then in context, then in environment for values cb = dict() try: for param, key, default in PARAMS: cb[key] = mes.get( param, rx.context.get(param, os.environ.get(param, default))) rx.logger.debug("param:{}={}".format(param, cb[key])) except Exception as exc: rx.on_failure(exc) # Transform JSON string representation of filters so they can be used # as Python regex. This is enough for filters passed from message but # not a URL parameter. # TODO implement urldecode on ?filters parameter parsed_filters = cb["filters"] # if cb["filters"] is not None: # for f in cb["filters"]: # parsed_filters.append(unquote(f)) # cb["filters"] = parsed_filters rx.logger.info('Processing event {0} for {1}'.format(action, cb['uuid'])) # Simple case - we're just processing 'indexed' if action == "indexed": rx.logger.info('Indexed job {}'.format(cb['uuid'])) try: store = ManagedPipelineJobInstance(rx.settings.mongodb, uuid=cb['uuid'], agave=rx.client) store_state = store.state last_event = store.last_event # notify events manager that we are planning to process an 'indexed' event if rx.settings.state_enter: forward_event(cb['uuid'], 'indexed', store_state, {"last_event": last_event}, rx) # This is where the actual indexed event is handled # (Job.state is updated and history amended) resp = store.indexed(token=cb["token"]) # notify events manager that we processed an 'indexed' event if rx.settings.state_exit: forward_event(cb['uuid'], 'indexed', resp['state'], {"last_event": resp['last_event']}, rx) rx.on_success('Processed indexed event for {0}'.format(cb['uuid'])) except Exception as mexc: rx.on_failure('Failed to handle indexed event: {}', mexc) if action in ["index", "urlparams"]: rx.logger.info('Indexing job {}'.format(cb['uuid'])) try: store = ManagedPipelineJobInstance(rx.settings.mongodb, agave=rx.client, uuid=cb['uuid']) # TODO - Pass in generated_by=config#pipelines.process_uuid # notify events manager that we got an 'index' event store_state = store.state last_event = store.last_event if rx.settings.state_enter: forward_event(cb['uuid'], 'index', 'INDEXING', {"last_event": last_event}, rx) resp = store.index( token=cb["token"], transition=False, filters=cb["filters"], generated_by=[rx.settings.pipelines.process_uuid], ) if rx.settings.state_exit: # because the index handler returns a list, we have to query the job in order to # know its state and last event updated_store = ManagedPipelineJobInstance(rx.settings.mongodb, agave=rx.client, uuid=cb['uuid']) forward_event(cb['uuid'], 'index', updated_store.state, {"last_event": updated_store.last_event}, rx) # rx.logger.info('store.index response was: {}'.format(resp)) if isinstance(resp, list): rx.logger.info( "Indexed {} files to PipelineJob {}. ({} usec)".format( len(resp), cb["uuid"], rx.elapsed())) # Send 'indexed' event to job via PipelineJobsManager (not PipelineJobsIndexer!) # This results in two messages required to move a job to FINISHED from INDEXING # but allows the jobs-manager to subscribe to and acto on the indexed event try: # resp = store.indexed(token=cb["token"]) if rx.settings['standalone'] is True: job_manager_id = rx.uid mgr_mes = {'uuid': cb['uuid'], 'name': 'indexed'} else: job_manager_id = rx.settings.pipelines.job_manager_id mgr_mes = { 'uuid': cb['uuid'], 'name': 'indexed', 'data': { 'source': 'jobs-manager.prod' } } rx.send_message(job_manager_id, mgr_mes, retryMaxAttempts=10) rx.on_success('Sent indexed event for {0}'.format( cb['uuid'])) except Exception as mexc: rx.on_failure('Failed to send indexed event', mexc) else: rx.logger.info("Indexed and transitioned to {0}".format( resp.get("state", "Unknown"))) except Exception as iexc: rx.on_failure("Failed to accomplish indexing", iexc) else: rx.on_failure("Failed to interpret indexing request")
def main(): r = Reactor() m = AttrDict(r.context.message_dict) # ! This code fixes an edge case and will be moved lower in the stack if m == {}: try: jsonmsg = json.loads(r.context.raw_message) m = jsonmsg except Exception: pass if not r.validate_message(m): r.on_failure('Invalid message received', None) agave_uri = m.get('uri') generated_by = m.get('generated_by', []) r.logger.info('Indexing {}'.format(agave_uri)) agave_sys, agave_path, agave_file = agaveutils.from_agave_uri(agave_uri) agave_full_path = os.path.join(agave_path, agave_file) agave_full_path = re.sub('^(/)+', '/', agave_full_path) ah = AgaveHelper(client=r.client) to_index = [] if ah.isfile(agave_full_path): # INDEX THE FILE mgr = Indexer(mongodb=r.settings.mongodb, agave=r.client) try: mgr.index_if_exists(agave_full_path, storage_system=agave_sys) except Exception as exc: r.on_failure('Indexing failed for {}'.format(agave_uri, exc)) # file_store = mgr.stores['file'] # fixity_store = mgr.stores['fixity'] # try: # resp = fixity_store.index(agave_full_path, storage_system=agave_sys, generated_by=generated_by) # r.logger.debug('Fixity indexed {} to uuid:{}'.format( # os.path.basename(agave_uri), resp.get('uuid', None))) # except Exception as exc: # r.on_failure('Indexing failed for {}'.format(agave_full_path), exc) else: # LIST DIR AND FIRE OFF INDEX TASKS r.logger.debug('Recursively listing {}'.format(agave_full_path)) to_index = ah.listdir(agave_full_path, recurse=True, storage_system=agave_sys, directories=False) r.logger.info('Found {} files to index'.format(len(to_index))) r.logger.debug('Messaging self with indexing targets') # Contents of to_list are likely to be in a sorted order. Adding a # shuffle spreads the indexing process evenly over all indexing targets shuffle(to_index) batch_sub = 0 for idxpath in to_index: try: r.logger.debug('Self, please index {}'.format(idxpath)) if r.local is False: actor_id = r.uid message = { 'uri': 'agave://' + agave_sys + '/' + idxpath, 'generated_by': generated_by, '__options': { 'parent': agave_uri } } resp = r.send_message(actor_id, message, retryMaxAttempts=3) batch_sub += 1 if batch_sub > r.settings.batch.size: batch_sub = 0 if r.settings.batch.randomize_sleep: sleep(random() * r.settings.batch.sleep_duration) else: sleep(r.settings.batch.sleep_duration) if 'executionId' in resp: r.logger.debug( 'Dispatched indexing task for {} in execution {}'. format(idxpath, resp['executionId'])) except Exception as exc: r.logger.critical( 'Failed to launch indexing task for {}: {}'.format( agave_full_path, exc))
def main(): rx = Reactor() m = AttrDict(rx.context.message_dict) if m == {}: try: jsonmsg = json.loads(rx.context.raw_message) m = jsonmsg except Exception: pass # ['event', 'agavejobs', 'create', 'delete'] action = "emptypost" try: for a in ["aloejobs", "event", "agavejobs"]: try: rx.logger.info("Testing against {} schema".format(a)) rx.validate_message(m, messageschema="/schemas/" + a + ".jsonschema", permissive=False) action = a break except Exception as exc: print("Validation error: {}".format(exc)) if action is None: pprint(m) raise ValidationError("Message did not a known schema") except Exception as vexc: rx.on_failure("Failed to process message", vexc) # rx.logger.debug("SCHEMA DETECTED: {}".format(action)) # store = PipelineJobStore(mongodb=rx.settings.mongodb) # Process the event # Get URL params from Abaco context # # These can be overridden by the event body or custom # code implemented to process the message. This has a # side effect of allowing the manager to process empty # POST bodies so long as the right values are presented # as URL params. # # cb_* variables are always overridden by the contents of # the POST body # cb_event_name = rx.context.get("event", None) cb_job_uuid = rx.context.get("uuid", None) cb_token = rx.context.get("token", "null") # Accept a 'note' as a URL parameter # TODO - urldecode the contents of 'note' cb_note = rx.context.get("note", "Event had no JSON payload") # NOTE - contents of cb_data will be overridden in create, event. aloejob cb_data = {"note": cb_note} # Accept 'status', the Aloe-centric name for job.state # as well as 'state' cb_agave_status = rx.context.get("status", rx.context.get("state", None)) # Prepare template PipelineJobsEvent event_dict = { "uuid": cb_job_uuid, "name": cb_event_name, "token": cb_token, "data": cb_data, } # This is the default message schema 'event' if action == "event": # Filter message and override values in event_dict with its contents for k in ["uuid", "name", "token", "data"]: event_dict[k] = m.get(k, event_dict.get(k)) # AgaveJobs can update the status of an existing job but cannot # create one. To do so, an Agave job must be launched # using the PipelineJobsAgaveProxy resource. if action == "agavejobs": rx.on_failure("Agave job callbacks are no longer supported") elif action == "aloejobs": try: # Aloe jobs POST their current JSON representation to # callback URL targets. The POST body contains a 'status' key. # If for some reason it doesn't, job status is determined by # the 'state' or 'status' URL parameter. if cb_agave_status is None: cb_agave_status = m.get("status", None) # Agave job message bodies include 'id' which is the jobId mes_agave_job_id = m.get("id", None) rx.logger.debug("aloe_status: {}".format(cb_agave_status)) if cb_agave_status is not None: cb_agave_status = cb_agave_status.upper() except Exception as exc: rx.on_failure( "Aloe callback POST and associated URL parameters were missing some required fields", exc, ) # If the job status is 'RUNNING' then use a subset of the POST for # event.data. Otherwise, create an event.data from the most recent # entry in the Agave job history. One small detail to note is that # callbacks are sent at the beginning of event processing in the # Agave jobs service and so a handful of fields in the job record # that are late bound are not yet populated when the event is sent. if cb_agave_status == "RUNNING": cb_data = minify_job_dict(dict(m)) else: cb_data = {"status": cb_agave_status} # Fetch latest history entry to put in event.data try: # Is there a better way than grabbing entire history that can # be implemented in a pure Agave call? Alternatively, we could # cache last offset for this job in rx.state but that will # limit our scaling to one worker # agave_job_latest_history = rx.client.jobs.getHistory( jobId=mes_agave_job_id, limit=100)[-1].get("description", None) if agave_job_latest_history is not None: cb_data["description"] = agave_job_latest_history except Exception as agexc: rx.logger.warning("Failed to get history for {}: {}".format( mes_agave_job_id, agexc)) # Map the Agave job status to an PipelineJobsEvent name if cb_event_name is None and cb_agave_status is not None: cb_event_name = AgaveEvents.agavejobs.get(cb_agave_status, "update") rx.logger.debug("Status: {} => Event: {}".format( cb_agave_status, cb_event_name)) # Event name and data can be updated as part of processing an Agave POST # so apply the current values to event_dict here event_dict["name"] = cb_event_name event_dict["data"] = cb_data # Sanity check event_dict and token if event_dict["uuid"] is None or event_dict[ "name"] is None or cb_token is None: rx.on_failure("No actionable event was received.") # Instantiate a job instance to leverage the MPJ framework store = ManagedPipelineJobInstance(rx.settings.mongodb, event_dict["uuid"], agave=rx.client) # Handle event... try: # First, proxy events. This code forwards index and indexed events to the jobs-indexer # Proxy 'index' if event_dict["name"] == "index": rx.logger.info("Forwarding 'index'") index_mes = { "name": "index", "uuid": event_dict["uuid"], "token": event_dict["token"], } rx.send_message(rx.settings.pipelines.job_indexer_id, index_mes, retryMaxAttempts=10) # Disable this since it should be picked up via events-manager subscription # message_control_annotator(up_job, ["INDEXING"], rx) # Proxy 'indexed' elif event_dict["name"] == "indexed": rx.logger.info("Forwarding 'indexed'") index_mes = { "name": "indexed", "uuid": event_dict["uuid"], "token": event_dict["token"], } rx.send_message(rx.settings.pipelines.job_indexer_id, index_mes, retryMaxAttempts=10) # Disable this since it should be picked up via events-manager subscription # message_control_annotator(up_job, ["FINISHED"], rx) # Handle all other events else: rx.logger.info("Handling '{}'".format(event_dict["name"])) # Get the current state of the MPJ. We use this to detect if # handling the event has resulted in a change of state store_state = store.state last_event = store.last_event # Send event at the beginning of state change so subscribers can pick # up, for instance, a case where the job receives an index event and # is in the FINISHED state. if rx.settings.state_enter: forward_event(event_dict["uuid"], event_dict['name'], store_state, {'last_event': last_event}, rx) up_job = store.handle(event_dict, cb_token) if rx.settings.state_exit: forward_event(up_job["uuid"], event_dict['name'], up_job["state"], {"last_event": up_job["last_event"]}, rx) except Exception as exc: rx.on_failure("Event not processed", exc) rx.on_success("Processed event in {} usec".format(rx.elapsed()))
def main(): r = Reactor() m = AttrDict(r.context.message_dict) if m == {}: try: print(r.context.raw_message) jsonmsg = json.loads(r.context.raw_message) m = jsonmsg except Exception: pass # Allow passed vars to override token = os.environ.get('token', None) uuid = os.environ.get('uuid', None) if uuid: action = os.environ.get('action', 'show') else: action = None try: for a in ['create']: try: schema_file = '/schemas/' + a + '.jsonschema' r.validate_message(m, messageschema=schema_file, permissive=False) action = a break except Exception as exc: if action not in ('enable', 'disable'): r.logger.debug('Validation to "{0}" failed: {1}\n'.format( a, exc)) if action is None: raise ValidationError('Message did not match any known schema') except Exception as vexc: r.on_failure('Failed to process message', vexc) r.logger.debug('Action selected: {}'.format(action)) # Set up Store objects pipe_store = PipelineStore(mongodb=r.settings.mongodb) if action == 'get': resp = pipe_store.find_one_by_uuid(uuid) r.logger.info(resp) r.on_success('Exists and was printed to execution log') if action == 'create': create_dict = copy.deepcopy(m) try: if uuid and token: r.logger.info('Replacing {}'.format(uuid)) pipeline = pipe_store.add_update_document( create_dict, uuid=uuid, token=token, strategy=strategies.REPLACE) else: r.logger.info('Creating pipeline...') pipeline = pipe_store.add_update_document(create_dict) r.on_success('Wrote pipeline {}; Update token: {}'.format( pipeline['uuid'], pipeline['_update_token'])) except Exception as exc: r.on_failure('Write failed', exc) if action == 'disable': try: r.logger.info('Disabling pipeline {}'.format(uuid)) resp = pipe_store.delete_document(uuid, token, force=False) r.on_success('Success') except Exception as exc: r.on_failure('Disable failed', exc) if action == 'enable': try: r.logger.info('Enabling pipeline {}'.format(uuid)) resp = pipe_store.undelete(uuid, token) r.on_success('Success') except Exception as exc: r.on_failure('Enable failed', exc)
def main(): def on_failure(message, exception): # if r.settings.pipelines.active: # job.fail(message) r.on_failure(message, exception) def on_success(message): # if r.settings.pipelines.active: # job.finish(message) r.on_success(message) r = Reactor() m = AttrDict(r.context.message_dict) # ! This code fixes an edge case and will be moved lower in the stack if m == {}: try: jsonmsg = json.loads(r.context.raw_message) m = jsonmsg except Exception: pass # Use JSONschema-based message validator if not r.validate_message(m): r.on_failure('Invalid message received', ValueError()) # Process options. Eventually move this into a Reactor method. # May need to add a filter to prevent some things from being over-written options_settings = {} if '__options' in m: # allow override of settings try: options_settings = m.get('options', {}).get('settings', {}) if isinstance(options_settings, dict): options_settings = AttrDict(options_settings) r.settings = r.settings + options_settings except Exception as exc: on_failure('Failed to handle options', exc) agave_uri = m.get('uri') agave_sys, agave_path, agave_file = datacatalog.agavehelpers.from_agave_uri(agave_uri) agave_full_path = os.path.join(agave_path, agave_file) # if r.settings.pipelines.active: # job = datacatalog.managers.pipelinejobs.ManagedPipelineJob( # r.settings.mongodb, # r.settings.pipelines, # instanced=False, # archive_path=agave_path # ) # job.setup().run({'Processing': agave_uri}) # r.logger.debug('Downloading file') # LOCALFILENAME = r.settings.downloaded # try: # bacanora.download(r.client, agave_full_path, LOCALFILENAME, agave_sys) # except Exception as exc: # # job.fail('Download failed') # on_failure('Failed to download {}'.format(agave_file), exc) # TODO - Add optional validation of file references before loading data try: r.logger.debug( 'Initializing SampleSetProcessor with {}'.format(r.client)) db = datacatalog.managers.sampleset.SampleSetProcessor( r.settings.mongodb, agave=r.client, samples_uri=agave_uri, path_prefix=agave_path).setup() # Validate the downloaded file # (optional, controlled by config.yml#validate) if r.settings.validate: r.logger.debug('Validating {}'.format('agave_file')) try: resolver = jsonschema.RefResolver('', '').resolve_remote(SCHEMA_URI) instance = json.load(open(agave_file, 'r')) assert jsonschema.validate(instance, resolver, format_checker=formatChecker()) is None except Exception as exc: on_failure( 'Failed to validate metadata file {}'.format(agave_file), exc) r.logger.debug('Now calling SampleSetProcessor.process()') dbp = db.process() assert dbp is True except Exception as exc: on_failure('Ingest failed for {}'.format(agave_file), exc) if not r.local: r.loggers.slack.info( ':mario_star: Ingested {} ({} usec)'.format(agave_uri, r.elapsed())) on_success('Ingest complete for {} ({} usec)'.format(agave_uri, r.elapsed()))
def main(): # Minimal Message Body: # { "uri": "s3://uploads/path/to/target.txt"} r = Reactor() m = AttrDict(r.context.message_dict) # ! This code fixes an edge case and will be moved lower in the stack if m == {}: try: jsonmsg = json.loads(r.context.raw_message) m = jsonmsg except Exception: pass # Use JSONschema-based message validator if not r.validate_message(m): r.on_failure('Message was invalid', None) # Rename m.Key so it makes semantic sense elsewhere in the code s3_uri = m.get('uri') if s3_uri.endswith('/'): s3_uri = s3_uri[:-1] only_sync = m.get('sync', True) generated_by = m.get('generated_by', []) r.logger.info('Received S3 URI {}'.format(s3_uri)) sh = S3Helper() ah = AgaveHelper(r.client) # Map POSIX source and destination s3_bucket, srcpath, srcfile = sh.from_s3_uri(s3_uri) # print(s3_bucket, srcpath, srcfile) s3_full_relpath = os.path.join(s3_bucket, srcpath, srcfile) if r.settings.safen_paths: # Munge out unicode characters on upload. Default for safen_path # also transforms spaces into hyphen character ag_full_relpath = safen_path(s3_full_relpath, no_unicode=True, no_spaces=True) if ag_full_relpath != s3_full_relpath: r.logger.warning('Safened path: {} => {}'.format( s3_full_relpath, ag_full_relpath)) else: ag_full_relpath = s3_full_relpath ag_uri = 'agave://data-sd2e-community/' + ag_full_relpath r.logger.info('Generated Tapis resource: {}'.format(ag_uri)) posix_src = sh.mapped_catalog_path(s3_full_relpath) posix_dest = ah.mapped_posix_path(os.path.join('/', ag_full_relpath)) # agave_full_path = agave_dest r.logger.debug('POSIX src: {}'.format(posix_src)) r.logger.debug('POSIX dst: {}'.format(posix_dest)) def cmpfiles(posix_src, posix_dest, mtime=True, size=True, cksum=False): # Existence if not os.path.exists(posix_dest): return False if not os.path.exists(posix_src): return False # Both files exist, so read in POSIX stat stat_src = os.stat(posix_src) stat_dest = os.stat(posix_dest) # Modification time (conditional) if mtime: # Mtime on source should never be more recent than # destination, as destination is a result of a copy # operation. We might need to add ability to account # for clock skew but at present we assume source and # destination filesystems are managed by the same host if stat_src.st_mtime > stat_dest.st_mtime: return False # Size (conditional) if size: if stat_src.st_size != stat_dest.st_size: return False if cksum: # Not implemented # TODO Implement very fast hasher instead of sha256 for sync # 1. https://github.com/kalafut/py-imohash # 2. https://pypi.org/project/xxhash/ raise NotImplementedError( 'Checksum comparison is not yet implemented') # None of the False tests returned so we can safely return True return True to_process = list() # Is the source physically a FILE? if sh.isfile(posix_src): # If in sync mode, check if source and destination differ if only_sync is True and cmpfiles(posix_src, posix_dest, mtime=False): # if os.path.exists(posix_dest) and only_sync is True: r.logger.debug('Compared: src == dest {}, {}'.format( posix_src, posix_dest)) else: # Not in sync mode - force overwrite destination with source r.logger.debug('Compared: src != dest {}, {}'.format( posix_src, posix_dest)) copyfile(r, posix_src, posix_dest, ag_uri) routemsg(r, ag_uri) elif sh.isdir(posix_src): # It's a directory. Recurse through it and launch file messages to self r.logger.debug('Directory found: {}'.format(posix_src)) to_process = sh.listdir(posix_src, recurse=True, bucket=s3_bucket, directories=False) pprint(to_process) r.logger.info('Sync tasks found: {}'.format(len(to_process))) # List to_list is constructed in POSIX ls order. Adding a shuffle # spreads the processing evenly over all files shuffle(to_process) batch_sub = 0 for procpath in to_process: try: r.logger.debug('Processing {}'.format(procpath)) # Here is the meat of the directory syncing behavior posix_src = sh.mapped_catalog_path(procpath) posix_dest = ah.mapped_posix_path(os.path.join('/', procpath)) if (only_sync is False or cmpfiles(posix_src, posix_dest, mtime=False) is False): r.logger.info('Copying {}'.format(procpath)) actor_id = r.uid resp = dict() s3_msg_uri = 's3://' + procpath message = { 'uri': s3_msg_uri, 'generated_by': generated_by, 'sync': only_sync } if r.local is False: try: r.logger.debug( 'Messaging {} with copy request'.format( actor_id)) resp = r.send_message(actor_id, message, retryMaxAttempts=3, ignoreErrors=False) if 'executionId' in resp: r.logger.info('Message response: {}'.format( resp['executionId'])) else: raise AgaveError('Message failed') except Exception: raise else: r.logger.debug(message) batch_sub += 1 # Always sleep a little bit between task submissions sleep(random() * r.settings.batch.task_sleep_duration) # Sleep a little longer every N submissions if batch_sub > r.settings.batch.size: batch_sub = 0 if r.settings.batch.randomize_sleep: sleep(random() * r.settings.batch.sleep_duration) else: sleep(r.settings.batch.sleep_duration) else: r.logger.debug('Copy not required for {}'.format(procpath)) except Exception as exc: r.logger.error('Copy operation failed for {}: {}'.format( ag_full_relpath, exc)) else: r.on_failure('Process failed and {} was not synced'.format(posix_src))
def main(): rx = Reactor() mes = AttrDict(rx.context.message_dict) mongodb_conn = rx.settings.mongodb # ! This code fixes an edge case in JSON serialization if mes == {}: try: jsonmsg = json.loads(rx.context.raw_message) mes = AttrDict(jsonmsg) except Exception as exc: rx.on_failure('Failed to load JSON from message', exc) # Check incoming message against the default JSON schema try: rx.validate_message(mes, permissive=False) except Exception as exc: rx.on_failure('Failed to validate message to schema', exc) # Verify appId is known to Agave apps API. Requires the invoking # user has a tenant admin role unless the appId is public agave_job = mes.get('job_definition') agave_appid = agave_job.get('appId') agave_app_details = None job_params = mes.get('parameters') instanced_archive_path = mes.get('instanced', True) rx.logger.info( 'Received request to manage execution of {}'.format(agave_appid)) try: agave_app_details = rx.client.apps.get(appId=agave_appid) except HTTPError as http_err: rx.on_failure( '{} is not a known Agave application'.format( agave_appid), http_err) except Exception as generic_exception: rx.on_failure( 'Failed to look up Agave application', generic_exception) # Look up the Pipeline record for this Agave appId. # # Note that this requires a convention where the standalone Agave app is # registered in the Pipelines system with pipeline.id == agave.app.id pipeline_uuid = None try: manager_stores = Manager.init_stores(mongodb_conn) pipeline_rec = manager_stores['pipeline'].find_one_by_id(id=agave_appid) if pipeline_rec is None: raise ValueError("No 'pipelines' record found in database") else: pipeline_uuid = pipeline_rec.get('uuid') except Exception as generic_exception: rx.on_failure('Failed to resolve appId {} to a Pipeline record'.format( agave_appid), generic_exception) def cancel_job(message='an error occurred', exception=None): """Helper function to cancel a failed job """ fmt_message = 'PipelineJob {} canceled because {}'.format( job_uuid, message) try: job.cancel() except Exception as job_cancel_exception: rx.logger.warning( 'Failed to cancel PipelineJob {} because {}'.format( job_uuid, job_cancel_exception)) rx.on_failure(fmt_message, exception) def fail_job(message='an error occurred', exception=None): """Helper function to fail a job """ fmt_message = 'PipelineJob {} failed because {}'.format( job_uuid, message) try: job.fail(data={'message': message}) except Exception as job_fail_exception: rx.logger.warning( 'Unable to update PipelineJob state for {} because {}'.format( job_uuid, job_fail_exception)) rx.on_failure(fmt_message, exception) # Initialize the ManagedPipelineJob. It will be in the jobs collection # with a status of CREATED. job = None job_uuid = None rx.logger.info('Building initial job.data') init_data = agave_job mes_data = mes.get('data', {}) for k, v in job_params.items(): if v is not None and isinstance(v, str): init_data[k] = v # init_data = {**init_data, **mes_data} try: job = ManagedPipelineJob(rx.settings.mongodb, rx.settings.pipelines.job_manager_id, rx.settings.pipelines.updates_nonce, pipeline_uuid=pipeline_uuid, data=init_data, session=rx.nickname, agent=rx.uid, task=rx.execid, instanced=instanced_archive_path, archive_path_patterns=mes.get( 'index_patterns', []), ** job_params ) job.setup(mes_data) job_uuid = job.uuid except Exception as generic_exception: if job is not None: cancel_job(message='Failed to set up ManagedPipelineJob', exception=generic_exception) else: rx.on_failure('Failed to set up ManagedPipelineJob', generic_exception) # Extend the incoming Agave job definition to update the PipelineJob. # Set the archivePath and archiveSystem from the ManagedPipelineJob # # The former is accomplished by adding custom notifications built from # the job's 'callback' property, which was initialized on job.setup(). Any # pre-existing notifications (email, other callbacks) are preserved. try: if 'notifications' not in agave_job: agave_job['notifications'] = list() # for event in ('SUBMITTING', 'STAGING_JOB', 'RUNNING', 'ARCHIVING', 'ARCHIVING_FINIS', 'FINISHED', 'FAILED'): # Capture all Agave job states notification = {'event': '*', 'persistent': True, 'url': job.callback + '&status=${STATUS}¬e=${JOB_ERROR}'} agave_job['notifications'].append(notification) notification = {'event': 'FINISHED', 'persistent': False, 'url': job.indexer_callback} agave_job['notifications'].append(notification) agave_job['archiveSystem'] = job.archive_system agave_job['archivePath'] = job.archive_path agave_job['archive'] = True except Exception as generic_exception: cancel_job( message='Failed to prepare Agave job definition', exception=generic_exception) if rx.local: print(json.dumps(agave_job, indent=4)) sys.exit(0) # Launch the Agave job agave_job_id = None try: resp = rx.client.jobs.submit(body=agave_job) agave_job_id = None if 'id' in resp: agave_job_id = resp['id'] else: raise KeyError('Invalid response received from jobs.submit()') except HTTPError as h: http_err_resp = agaveutils.process_agave_httperror(h) fail_job( message='Encountered API error: {}'.format(http_err_resp), exception=HTTPError) except Exception as job_submit_exception: fail_job(message='Failed to launch {}'.format( agave_appid), exception=job_submit_exception) # Update the PipelineJob status # # This will create an entry in its history with an explicit link to # the job asset. If this doesn't succeed, we don't fail the job since # the expensive part (the Agave job) has been submitted. try: job_uri = job.canonicalize_job(agave_job_id) job.run(data={'job_link': job_uri}) except Exception as job_update_exception: rx.logger.warning( 'Unable to update status of job {} because {}'.format( job_uuid, job_update_exception)) # If no other exit state has been encountered, report success rx.on_success('ManagedPipelineJob {} is managing Agave job {} ({} usec)'.format( job_uuid, agave_job_id, rx.elapsed()))