def track_sample_state_on_stopped_job(detail, event): """ persists all the definitions of a sample and it's final state, if the state is not a finish state, it will autoamtically set the sample to failed """ overrides = detail['overrides'] container = detail['containers'][0] env = overrides['containerOverrides'][0]['environment'] data = {} for x in env: name = x['name'] value = x['value'] data[name] = value data['start_time'] = detail['startedAt'] data['end_time'] = detail['stoppedAt'] # compute the state of the sample here # compute which state it should be in # if its wrong, than mark the sample as failed if "CARROT_SAMPLE" in data: data['sample'] = data['CARROT_SAMPLE'] data['id'] = event['id'] data['timestamp'] = time.time() sample = get_file_by_handle(data['CARROT_SAMPLE']) state = get_tracked_state(sample) if state in ['uploaded', 'failed']: # all good logger.info( f"task finished in an acceptable state. {sample} with {state}") else: data['observed_state'] = state logger.warning( f"task finished with some state, which would stall operations. Manually setting it as failed for {sample} with state {state}, to avoid stalling" ) save_sample_state( sample, "failed", data['CARROT_SAMPLE'], f"the final state for this file was '{state}', which was not sufficient and would stall the computations", event) insert_stall_event(data) else: logger.info( f"was not a sample based task, moving on. {json.dumps(data, indent=4)}" )
def triggerEvent(data): """ submits the given data to the table (previously queue) :param data: requires sample and status in it, to be considered validd :return: a serialized version of the submitted message """ try: validate(data, __TRACKING_SCHEMA__) except ValidationError as e: return { 'body': json.dumps({ 'error': str(e), 'content': data }), 'statusCode': 500, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } item = save_sample_state(sample=data['sample'], state=data['status'], fileHandle=data.get('fileHandle', None), reason=data.get('reason', data.get('failed', None)), optional=data.get('optional', None))[0] return { 'body': json.dumps(item, use_decimal=True), 'statusCode': 200, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }
def triggerEvent(data): """ submits the given data to the queue :param data: requires sample :return: a serialized version of the submitted message """ logger.info("trigger event: " + json.dumps(data)) saved = {} try: validate(data, __ACQUISITION_SCHEMA__) timestamp = int(time.time() * 1000) data['time'] = timestamp data['id'] = data['sample'] # put item in table instead of queueing tm = TableManager() saved = store_acquisition_data(data, tm) tracked = save_sample_state(sample=data['sample'], state='entered', fileHandle=None, reason="new acquisition entered", optional=None) logger.info("added tracking for acquisition data") except ValidationException as vex: traceback.print_exc() data = str(vex.body) saved['ResponseMetadata']['HTTPStatusCode'] = 400 except Exception as ex: traceback.print_exc() data = str(ex) saved['ResponseMetadata']['HTTPStatusCode'] = 500 return { 'body': json.dumps(data), 'statusCode': saved['ResponseMetadata']['HTTPStatusCode'], 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }
def bucket_json(event, context): """ handles json trigger events :param event: :param context: :return: """ logger.info(f"bucket json got trigger: {event}") if len(event['Records']) == 0: logger.info("no records found!") return for record in event['Records']: logger.info(f"received record: {record}") o = record['s3']['object'] k = str(o['key']) logger.info("received key {}".format(k)) sample = get_file_by_handle(k) logger.info("sample was uploaded: {}".format(sample)) result = save_sample_state( sample=sample, state=UPLOADED, fileHandle=k, reason="processed file was uploaded to the bucket") if result is None: logger.info( "we were not able to update the sample: {}".format(sample)) else: logger.info("sample state was set to: {}".format(result)) jobs = load_jobs_for_sample(sample) if jobs is not None: logger.info("found {} associated jobs for this sample".format( len(jobs))) for job in jobs: from stasis.jobs.sync import sync_job sync_job(job=job) else: logger.info("we did not find a job for this sample!")
def schedule_processing_to_fargate(event, context): """ submits a new task to the cluster - a fargate task will run it :param event: :param context: :return: """ body = json.loads(event['body']) try: validate(body, __SCHEDULE__) import boto3 overrides = { "containerOverrides": [{ "name": "carrot-runner", "environment": [ { "name": "SPRING_PROFILES_ACTIVE", "value": "{}{},{}".format('aws', os.getenv('current_stage'), body["profile"]) # AWS profile needs to be active for this system to connect to the AWS database }, { "name": "CARROT_SAMPLE", "value": "{}".format(body['sample']) }, { "name": "CARROT_METHOD", "value": "{}".format(body['method']) }, { "name": "CARROT_MODE", "value": "{}".format(body['profile']) }, ] }] } task_name = "{}-{}".format(os.getenv("current_stage"), SECURE_CARROT_RUNNER) if 'key' in body and body['key'] is not None: overrides['containerOverrides'][0]['environment'].append({ "name": "STASIS_KEY", "value": body['key'] }) send_to_fargate(overrides=overrides, task_name=task_name) save_sample_state(sample=body['sample'], state='scheduled', reason='discovered by fargate runner', fileHandle=None) return { 'statusCode': 200, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } except ValidationError as e: logger.info("validation error") logger.info(body) traceback.print_exc() return { 'body': json.dumps(str(e)), 'statusCode': 503, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } pass except Exception as e: logger.info(body) traceback.print_exc() create( { "body": json.dumps({ 'sample': body['sample'], 'status': FAILED, 'reason': str(e) }) }, {}) return { 'body': json.dumps(str(e)), 'statusCode': 503, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }
def test_calculate_job_state_with_zip_upload(requireMocking, mocked_10_sample_job): # set 1 sample state to exported save_sample_state(sample="abc_0", state=EXPORTED) # this should update the job state accordingly to processing sync_job(get_job_config("12345")) state = get_job_state("12345") assert state == PROCESSING # set sample state to failed save_sample_state(sample="abc_1", state=FAILED) sync_job(get_job_config("12345")) # this should keep the job state in state processing state = get_job_state("12345") assert state == PROCESSING # set all other samples to exported save_sample_state(sample="abc_2", state=EXPORTED) save_sample_state(sample="abc_3", state=EXPORTED) save_sample_state(sample="abc_4", state=EXPORTED) save_sample_state(sample="abc_5", state=EXPORTED) save_sample_state(sample="abc_6", state=EXPORTED) save_sample_state(sample="abc_7", state=EXPORTED) save_sample_state(sample="abc_8", state=EXPORTED) save_sample_state(sample="abc_9", state=EXPORTED) sync_job(get_job_config("12345")) # this should set the job state to aggregation scheduling now state = get_job_state("12345") assert state == AGGREGATING_SCHEDULED # trigger an upload to the zip bucket bucket_zip({'Records': [{'s3': {'object': {'key': '12345.zip'}}}]}, {}) # job should now be aggregated state = get_job_state("12345") assert state == AGGREGATED_AND_UPLOADED
def test_calculate_job_state_2(requireMocking, mocked_10_sample_job): result = schedule_job({'pathParameters': {"job": "12345"}}, {}) watch_job_schedule_queue() state = get_job_state("12345") assert state == SCHEDULED # set 1 sample state to failed save_sample_state(sample="abc_0", state=FAILED) sync_job(get_job_config("12345")) # this should update the job state accordingly to processing state = get_job_state("12345") assert state == PROCESSING # set sample state to failed save_sample_state(sample="abc_1", state=FAILED) # this should keep the job state in state processing sync_job(get_job_config("12345")) state = get_job_state("12345") assert state == PROCESSING # set all other samples to exported save_sample_state(sample="abc_2", state=EXPORTED) save_sample_state(sample="abc_3", state=EXPORTED) save_sample_state(sample="abc_4", state=EXPORTED) save_sample_state(sample="abc_5", state=EXPORTED) save_sample_state(sample="abc_6", state=EXPORTED) save_sample_state(sample="abc_7", state=EXPORTED) save_sample_state(sample="abc_8", state=EXPORTED) save_sample_state(sample="abc_9", state=EXPORTED) sync_job(get_job_config("12345")) # this should set the job state to aggregation scheduling now state = get_job_state("12345") assert state == AGGREGATING_SCHEDULED
def store_sample_for_job(event, context): """ stores an associated sample for an job :param event: :param context: :return: """ body = json.loads(event['body']) try: validate(body, __SAMPLE_JOB_SCHEMA__) except Exception as e: logger.info(f"received body was considered invalid: {body}") traceback.print_exc() return { 'body': json.dumps({ 'state': str(FAILED), 'reason': str(e) }), 'statusCode': 503, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } tracking = body.get('meta', {}).get('tracking', []) meta = body.get('meta', {}) meta.pop('tracking', None) sample = body.get('sample') job = body.get("job") try: # overwrite tracking states and extension if it's provided for track in tracking: if 'extension' in track: fileHandle = "{}.{}".format(sample, track['extension']) else: fileHandle = None save_sample_state(sample=sample, state=track['state'], fileHandle=fileHandle) if len(meta) > 0: # update the class here by updating the acquisition part # 1. get acquisition data # 2. set new metadata tm = TableManager() acqtable = tm.get_acquisition_table() result = acqtable.query( KeyConditionExpression=Key('id').eq(sample)) if "Items" in result and len(result['Items']) > 0: data = result['Items'][0] else: timestamp = int(time.time() * 1000) data = { 'time': timestamp, 'id': sample, 'sample': sample, 'experiment': _fetch_experiment(sample), 'acquisition': { 'instrument': 'unknown', 'ionisation': 'unknown', 'method': 'unknown' }, 'processing': { 'method': 'unknown' }, } try: data = Merge().data_merge(data, meta) store_acquisition_data(data, tm) except Exception as e: logger.info(f"generated data was considered invalid: {data}") traceback.print_exc() return { 'body': json.dumps({ 'state': str(FAILED), 'reason': str(traceback.format_exc()) }), 'statusCode': 503, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } set_sample_job_state(job=job, sample=sample, state=SCHEDULING) return { 'body': json.dumps({ 'state': str(SCHEDULING), 'job': job, 'sample': sample, 'reason': 'sample was submitted' }), 'statusCode': 200, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } except Exception as e: # update job state in the system to failed with the related reason error_diagnostics = traceback.format_exc() set_sample_job_state(job=job, sample=sample, state=FAILED, reason=f"{str(e)} = {error_diagnostics}") traceback.print_exc() return { 'body': json.dumps({ 'state': str(FAILED), 'job': job, 'sample': sample, 'reason': str(e) }), 'statusCode': 500, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }