def main(): r = Reactor() m = AttrDict(r.context.message_dict) # ! This code fixes an edge case and will be moved lower in the stack if m == {}: try: jsonmsg = json.loads(r.context.raw_message) m = jsonmsg except Exception: pass if not r.validate_message(m): r.on_failure('Invalid message received', None) agave_uri = m.get('uri') generated_by = m.get('generated_by', []) r.logger.info('Indexing {}'.format(agave_uri)) agave_sys, agave_path, agave_file = agaveutils.from_agave_uri(agave_uri) agave_full_path = os.path.join(agave_path, agave_file) agave_full_path = re.sub('^(/)+', '/', agave_full_path) ah = AgaveHelper(client=r.client) to_index = [] if ah.isfile(agave_full_path): # INDEX THE FILE mgr = Indexer(mongodb=r.settings.mongodb, agave=r.client) try: mgr.index_if_exists(agave_full_path, storage_system=agave_sys) except Exception as exc: r.on_failure('Indexing failed for {}'.format(agave_uri, exc)) # file_store = mgr.stores['file'] # fixity_store = mgr.stores['fixity'] # try: # resp = fixity_store.index(agave_full_path, storage_system=agave_sys, generated_by=generated_by) # r.logger.debug('Fixity indexed {} to uuid:{}'.format( # os.path.basename(agave_uri), resp.get('uuid', None))) # except Exception as exc: # r.on_failure('Indexing failed for {}'.format(agave_full_path), exc) else: # LIST DIR AND FIRE OFF INDEX TASKS r.logger.debug('Recursively listing {}'.format(agave_full_path)) to_index = ah.listdir(agave_full_path, recurse=True, storage_system=agave_sys, directories=False) r.logger.info('Found {} files to index'.format(len(to_index))) r.logger.debug('Messaging self with indexing targets') # Contents of to_list are likely to be in a sorted order. Adding a # shuffle spreads the indexing process evenly over all indexing targets shuffle(to_index) batch_sub = 0 for idxpath in to_index: try: r.logger.debug('Self, please index {}'.format(idxpath)) if r.local is False: actor_id = r.uid message = { 'uri': 'agave://' + agave_sys + '/' + idxpath, 'generated_by': generated_by, '__options': { 'parent': agave_uri } } resp = r.send_message(actor_id, message, retryMaxAttempts=3) batch_sub += 1 if batch_sub > r.settings.batch.size: batch_sub = 0 if r.settings.batch.randomize_sleep: sleep(random() * r.settings.batch.sleep_duration) else: sleep(r.settings.batch.sleep_duration) if 'executionId' in resp: r.logger.debug( 'Dispatched indexing task for {} in execution {}'. format(idxpath, resp['executionId'])) except Exception as exc: r.logger.critical( 'Failed to launch indexing task for {}: {}'.format( agave_full_path, exc))
def main(): rx = Reactor() m = AttrDict(rx.context.message_dict) if m == {}: try: jsonmsg = json.loads(rx.context.raw_message) m = jsonmsg except Exception: pass # ['event', 'agavejobs', 'create', 'delete'] action = "emptypost" try: for a in ["aloejobs", "event", "agavejobs"]: try: rx.logger.info("Testing against {} schema".format(a)) rx.validate_message(m, messageschema="/schemas/" + a + ".jsonschema", permissive=False) action = a break except Exception as exc: print("Validation error: {}".format(exc)) if action is None: pprint(m) raise ValidationError("Message did not a known schema") except Exception as vexc: rx.on_failure("Failed to process message", vexc) # rx.logger.debug("SCHEMA DETECTED: {}".format(action)) # store = PipelineJobStore(mongodb=rx.settings.mongodb) # Process the event # Get URL params from Abaco context # # These can be overridden by the event body or custom # code implemented to process the message. This has a # side effect of allowing the manager to process empty # POST bodies so long as the right values are presented # as URL params. # # cb_* variables are always overridden by the contents of # the POST body # cb_event_name = rx.context.get("event", None) cb_job_uuid = rx.context.get("uuid", None) cb_token = rx.context.get("token", "null") # Accept a 'note' as a URL parameter # TODO - urldecode the contents of 'note' cb_note = rx.context.get("note", "Event had no JSON payload") # NOTE - contents of cb_data will be overridden in create, event. aloejob cb_data = {"note": cb_note} # Accept 'status', the Aloe-centric name for job.state # as well as 'state' cb_agave_status = rx.context.get("status", rx.context.get("state", None)) # Prepare template PipelineJobsEvent event_dict = { "uuid": cb_job_uuid, "name": cb_event_name, "token": cb_token, "data": cb_data, } # This is the default message schema 'event' if action == "event": # Filter message and override values in event_dict with its contents for k in ["uuid", "name", "token", "data"]: event_dict[k] = m.get(k, event_dict.get(k)) # AgaveJobs can update the status of an existing job but cannot # create one. To do so, an Agave job must be launched # using the PipelineJobsAgaveProxy resource. if action == "agavejobs": rx.on_failure("Agave job callbacks are no longer supported") elif action == "aloejobs": try: # Aloe jobs POST their current JSON representation to # callback URL targets. The POST body contains a 'status' key. # If for some reason it doesn't, job status is determined by # the 'state' or 'status' URL parameter. if cb_agave_status is None: cb_agave_status = m.get("status", None) # Agave job message bodies include 'id' which is the jobId mes_agave_job_id = m.get("id", None) rx.logger.debug("aloe_status: {}".format(cb_agave_status)) if cb_agave_status is not None: cb_agave_status = cb_agave_status.upper() except Exception as exc: rx.on_failure( "Aloe callback POST and associated URL parameters were missing some required fields", exc, ) # If the job status is 'RUNNING' then use a subset of the POST for # event.data. Otherwise, create an event.data from the most recent # entry in the Agave job history. One small detail to note is that # callbacks are sent at the beginning of event processing in the # Agave jobs service and so a handful of fields in the job record # that are late bound are not yet populated when the event is sent. if cb_agave_status == "RUNNING": cb_data = minify_job_dict(dict(m)) else: cb_data = {"status": cb_agave_status} # Fetch latest history entry to put in event.data try: # Is there a better way than grabbing entire history that can # be implemented in a pure Agave call? Alternatively, we could # cache last offset for this job in rx.state but that will # limit our scaling to one worker # agave_job_latest_history = rx.client.jobs.getHistory( jobId=mes_agave_job_id, limit=100)[-1].get("description", None) if agave_job_latest_history is not None: cb_data["description"] = agave_job_latest_history except Exception as agexc: rx.logger.warning("Failed to get history for {}: {}".format( mes_agave_job_id, agexc)) # Map the Agave job status to an PipelineJobsEvent name if cb_event_name is None and cb_agave_status is not None: cb_event_name = AgaveEvents.agavejobs.get(cb_agave_status, "update") rx.logger.debug("Status: {} => Event: {}".format( cb_agave_status, cb_event_name)) # Event name and data can be updated as part of processing an Agave POST # so apply the current values to event_dict here event_dict["name"] = cb_event_name event_dict["data"] = cb_data # Sanity check event_dict and token if event_dict["uuid"] is None or event_dict[ "name"] is None or cb_token is None: rx.on_failure("No actionable event was received.") # Instantiate a job instance to leverage the MPJ framework store = ManagedPipelineJobInstance(rx.settings.mongodb, event_dict["uuid"], agave=rx.client) # Handle event... try: # First, proxy events. This code forwards index and indexed events to the jobs-indexer # Proxy 'index' if event_dict["name"] == "index": rx.logger.info("Forwarding 'index'") index_mes = { "name": "index", "uuid": event_dict["uuid"], "token": event_dict["token"], } rx.send_message(rx.settings.pipelines.job_indexer_id, index_mes, retryMaxAttempts=10) # Disable this since it should be picked up via events-manager subscription # message_control_annotator(up_job, ["INDEXING"], rx) # Proxy 'indexed' elif event_dict["name"] == "indexed": rx.logger.info("Forwarding 'indexed'") index_mes = { "name": "indexed", "uuid": event_dict["uuid"], "token": event_dict["token"], } rx.send_message(rx.settings.pipelines.job_indexer_id, index_mes, retryMaxAttempts=10) # Disable this since it should be picked up via events-manager subscription # message_control_annotator(up_job, ["FINISHED"], rx) # Handle all other events else: rx.logger.info("Handling '{}'".format(event_dict["name"])) # Get the current state of the MPJ. We use this to detect if # handling the event has resulted in a change of state store_state = store.state last_event = store.last_event # Send event at the beginning of state change so subscribers can pick # up, for instance, a case where the job receives an index event and # is in the FINISHED state. if rx.settings.state_enter: forward_event(event_dict["uuid"], event_dict['name'], store_state, {'last_event': last_event}, rx) up_job = store.handle(event_dict, cb_token) if rx.settings.state_exit: forward_event(up_job["uuid"], event_dict['name'], up_job["state"], {"last_event": up_job["last_event"]}, rx) except Exception as exc: rx.on_failure("Event not processed", exc) rx.on_success("Processed event in {} usec".format(rx.elapsed()))
def main(): rx = Reactor() mes = AttrDict(rx.context.message_dict) rx.logger.info('raw_message: {}'.format(rx.context.raw_message)) if mes == {}: try: jsonmsg = json.loads(rx.context.raw_message) mes = jsonmsg except Exception: pass # ['event', 'agavejobs', 'index', 'indexed'] action = "urlparams" try: for a in ["index", "indexed"]: try: rx.logger.debug("Checking against schema {}".format(a)) rx.validate_message(mes, messageschema="/schemas/" + a + ".jsonschema", permissive=False) action = a break except Exception as exc: print("Validation error: {}".format(exc)) if action is None: pprint(mes) raise ValidationError("Unknown schema") except Exception as vexc: rx.on_failure("Message was not processed", vexc) rx.logger.debug("Schema: {}".format(action)) # for k, v in os.environ.items(): # rx.logger.debug("env:{}={}".format(k, v)) PARAMS = [ ("uuid", "uuid", None), ("token", "token", None), ("level", "level", "1"), ("filters", "filters", None), ] # Look in the message, then in context, then in environment for values cb = dict() try: for param, key, default in PARAMS: cb[key] = mes.get( param, rx.context.get(param, os.environ.get(param, default))) rx.logger.debug("param:{}={}".format(param, cb[key])) except Exception as exc: rx.on_failure(exc) # Transform JSON string representation of filters so they can be used # as Python regex. This is enough for filters passed from message but # not a URL parameter. # TODO implement urldecode on ?filters parameter parsed_filters = cb["filters"] # if cb["filters"] is not None: # for f in cb["filters"]: # parsed_filters.append(unquote(f)) # cb["filters"] = parsed_filters rx.logger.info('Processing event {0} for {1}'.format(action, cb['uuid'])) # Simple case - we're just processing 'indexed' if action == "indexed": rx.logger.info('Indexed job {}'.format(cb['uuid'])) try: store = ManagedPipelineJobInstance(rx.settings.mongodb, uuid=cb['uuid'], agave=rx.client) store_state = store.state last_event = store.last_event # notify events manager that we are planning to process an 'indexed' event if rx.settings.state_enter: forward_event(cb['uuid'], 'indexed', store_state, {"last_event": last_event}, rx) # This is where the actual indexed event is handled # (Job.state is updated and history amended) resp = store.indexed(token=cb["token"]) # notify events manager that we processed an 'indexed' event if rx.settings.state_exit: forward_event(cb['uuid'], 'indexed', resp['state'], {"last_event": resp['last_event']}, rx) rx.on_success('Processed indexed event for {0}'.format(cb['uuid'])) except Exception as mexc: rx.on_failure('Failed to handle indexed event: {}', mexc) if action in ["index", "urlparams"]: rx.logger.info('Indexing job {}'.format(cb['uuid'])) try: store = ManagedPipelineJobInstance(rx.settings.mongodb, agave=rx.client, uuid=cb['uuid']) # TODO - Pass in generated_by=config#pipelines.process_uuid # notify events manager that we got an 'index' event store_state = store.state last_event = store.last_event if rx.settings.state_enter: forward_event(cb['uuid'], 'index', 'INDEXING', {"last_event": last_event}, rx) resp = store.index( token=cb["token"], transition=False, filters=cb["filters"], generated_by=[rx.settings.pipelines.process_uuid], ) if rx.settings.state_exit: # because the index handler returns a list, we have to query the job in order to # know its state and last event updated_store = ManagedPipelineJobInstance(rx.settings.mongodb, agave=rx.client, uuid=cb['uuid']) forward_event(cb['uuid'], 'index', updated_store.state, {"last_event": updated_store.last_event}, rx) # rx.logger.info('store.index response was: {}'.format(resp)) if isinstance(resp, list): rx.logger.info( "Indexed {} files to PipelineJob {}. ({} usec)".format( len(resp), cb["uuid"], rx.elapsed())) # Send 'indexed' event to job via PipelineJobsManager (not PipelineJobsIndexer!) # This results in two messages required to move a job to FINISHED from INDEXING # but allows the jobs-manager to subscribe to and acto on the indexed event try: # resp = store.indexed(token=cb["token"]) if rx.settings['standalone'] is True: job_manager_id = rx.uid mgr_mes = {'uuid': cb['uuid'], 'name': 'indexed'} else: job_manager_id = rx.settings.pipelines.job_manager_id mgr_mes = { 'uuid': cb['uuid'], 'name': 'indexed', 'data': { 'source': 'jobs-manager.prod' } } rx.send_message(job_manager_id, mgr_mes, retryMaxAttempts=10) rx.on_success('Sent indexed event for {0}'.format( cb['uuid'])) except Exception as mexc: rx.on_failure('Failed to send indexed event', mexc) else: rx.logger.info("Indexed and transitioned to {0}".format( resp.get("state", "Unknown"))) except Exception as iexc: rx.on_failure("Failed to accomplish indexing", iexc) else: rx.on_failure("Failed to interpret indexing request")
def main(): # Minimal Message Body: # { "uri": "s3://uploads/path/to/target.txt"} r = Reactor() m = AttrDict(r.context.message_dict) # ! This code fixes an edge case and will be moved lower in the stack if m == {}: try: jsonmsg = json.loads(r.context.raw_message) m = jsonmsg except Exception: pass # Use JSONschema-based message validator if not r.validate_message(m): r.on_failure('Message was invalid', None) # Rename m.Key so it makes semantic sense elsewhere in the code s3_uri = m.get('uri') if s3_uri.endswith('/'): s3_uri = s3_uri[:-1] only_sync = m.get('sync', True) generated_by = m.get('generated_by', []) r.logger.info('Received S3 URI {}'.format(s3_uri)) sh = S3Helper() ah = AgaveHelper(r.client) # Map POSIX source and destination s3_bucket, srcpath, srcfile = sh.from_s3_uri(s3_uri) # print(s3_bucket, srcpath, srcfile) s3_full_relpath = os.path.join(s3_bucket, srcpath, srcfile) if r.settings.safen_paths: # Munge out unicode characters on upload. Default for safen_path # also transforms spaces into hyphen character ag_full_relpath = safen_path(s3_full_relpath, no_unicode=True, no_spaces=True) if ag_full_relpath != s3_full_relpath: r.logger.warning('Safened path: {} => {}'.format( s3_full_relpath, ag_full_relpath)) else: ag_full_relpath = s3_full_relpath ag_uri = 'agave://data-sd2e-community/' + ag_full_relpath r.logger.info('Generated Tapis resource: {}'.format(ag_uri)) posix_src = sh.mapped_catalog_path(s3_full_relpath) posix_dest = ah.mapped_posix_path(os.path.join('/', ag_full_relpath)) # agave_full_path = agave_dest r.logger.debug('POSIX src: {}'.format(posix_src)) r.logger.debug('POSIX dst: {}'.format(posix_dest)) def cmpfiles(posix_src, posix_dest, mtime=True, size=True, cksum=False): # Existence if not os.path.exists(posix_dest): return False if not os.path.exists(posix_src): return False # Both files exist, so read in POSIX stat stat_src = os.stat(posix_src) stat_dest = os.stat(posix_dest) # Modification time (conditional) if mtime: # Mtime on source should never be more recent than # destination, as destination is a result of a copy # operation. We might need to add ability to account # for clock skew but at present we assume source and # destination filesystems are managed by the same host if stat_src.st_mtime > stat_dest.st_mtime: return False # Size (conditional) if size: if stat_src.st_size != stat_dest.st_size: return False if cksum: # Not implemented # TODO Implement very fast hasher instead of sha256 for sync # 1. https://github.com/kalafut/py-imohash # 2. https://pypi.org/project/xxhash/ raise NotImplementedError( 'Checksum comparison is not yet implemented') # None of the False tests returned so we can safely return True return True to_process = list() # Is the source physically a FILE? if sh.isfile(posix_src): # If in sync mode, check if source and destination differ if only_sync is True and cmpfiles(posix_src, posix_dest, mtime=False): # if os.path.exists(posix_dest) and only_sync is True: r.logger.debug('Compared: src == dest {}, {}'.format( posix_src, posix_dest)) else: # Not in sync mode - force overwrite destination with source r.logger.debug('Compared: src != dest {}, {}'.format( posix_src, posix_dest)) copyfile(r, posix_src, posix_dest, ag_uri) routemsg(r, ag_uri) elif sh.isdir(posix_src): # It's a directory. Recurse through it and launch file messages to self r.logger.debug('Directory found: {}'.format(posix_src)) to_process = sh.listdir(posix_src, recurse=True, bucket=s3_bucket, directories=False) pprint(to_process) r.logger.info('Sync tasks found: {}'.format(len(to_process))) # List to_list is constructed in POSIX ls order. Adding a shuffle # spreads the processing evenly over all files shuffle(to_process) batch_sub = 0 for procpath in to_process: try: r.logger.debug('Processing {}'.format(procpath)) # Here is the meat of the directory syncing behavior posix_src = sh.mapped_catalog_path(procpath) posix_dest = ah.mapped_posix_path(os.path.join('/', procpath)) if (only_sync is False or cmpfiles(posix_src, posix_dest, mtime=False) is False): r.logger.info('Copying {}'.format(procpath)) actor_id = r.uid resp = dict() s3_msg_uri = 's3://' + procpath message = { 'uri': s3_msg_uri, 'generated_by': generated_by, 'sync': only_sync } if r.local is False: try: r.logger.debug( 'Messaging {} with copy request'.format( actor_id)) resp = r.send_message(actor_id, message, retryMaxAttempts=3, ignoreErrors=False) if 'executionId' in resp: r.logger.info('Message response: {}'.format( resp['executionId'])) else: raise AgaveError('Message failed') except Exception: raise else: r.logger.debug(message) batch_sub += 1 # Always sleep a little bit between task submissions sleep(random() * r.settings.batch.task_sleep_duration) # Sleep a little longer every N submissions if batch_sub > r.settings.batch.size: batch_sub = 0 if r.settings.batch.randomize_sleep: sleep(random() * r.settings.batch.sleep_duration) else: sleep(r.settings.batch.sleep_duration) else: r.logger.debug('Copy not required for {}'.format(procpath)) except Exception as exc: r.logger.error('Copy operation failed for {}: {}'.format( ag_full_relpath, exc)) else: r.on_failure('Process failed and {} was not synced'.format(posix_src))