def list_all_jobs(event, context): """ list all jobs """ tm = TableManager() table = tm.get_job_state_table() try: result = table.scan() items = result['Items'] while 'LastEvaluatedKey' in result: # logger.info(result['LastEvaluatedKey']) result = table.scan(ExclusiveStartKey=result['LastEvaluatedKey']) items.extend(result['Items']) return { 'statusCode': 200, 'headers': __HTTP_HEADERS__, 'body': json.dumps({'jobs': items, 'error': ''}) } except Exception as ex: return { 'statusCode': 500, 'headers': __HTTP_HEADERS__, 'body': json.dumps({'jobs': [], 'error': str(ex)}) }
def test_delete(requireMocking): # add test sample tm = TableManager() table = tm.get_tracking_table() table.put_item( Item={ "id": "test-to-delete", "experiment": "unknown", "sample": "test-to-delete", "status": [{ "time": 1524772162698, "value": "PROCESSING" }] }) # check it's there result = get.get({"pathParameters": {"sample": "test-to-delete"}}, {}) assert json.loads(result['body'])["id"] == "test-to-delete" # call deletion result = delete.delete({"pathParameters": { "sample": "test-to-delete" }}, {}) # assert test data is gone assert result['statusCode'] == 204 assert get.get({"pathParameters": { "sample": "test-to-delete" }}, {})['statusCode'] == 404
def get(event, context): """ gets the information for the specific record in the dynamodb database """ if 'pathParameters' in event: parameters = event['pathParameters'] if 'sample' in parameters and 'job' in parameters: tm = TableManager() id = tm.generate_job_id(parameters['job'], parameters['sample']) table = tm.get_job_sample_state_table() result = table.query( KeyConditionExpression=Key('id').eq(id) ) if "Items" in result and len(result['Items']) > 0: return { "statusCode": 200, "headers": __HTTP_HEADERS__, "body": json.dumps(result['Items'][0]) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "no sample found with this identifier : {}".format( event['pathParameters']['sample'])}) } # invalid! return { 'statusCode': 503 }
def delete(events, context): """deletes the specific element from the storage""" logger.info("received event: " + json.dumps(events, indent=2)) if 'pathParameters' in events: if 'sample' in events['pathParameters']: tm = TableManager() table = tm.get_tracking_table() result = table.query(KeyConditionExpression=Key('id').eq( events['pathParameters']['sample'])) logger.info("found result to delete: %s" % result) if 'Items' in result and len(result['Items']) > 0: result = json.dumps(result['Items'][0], use_decimal=True) result = json.loads(result, use_decimal=True) # create a response when sample is found table.delete_item(Key={ 'id': result['id'], 'sample': result['id'] }) return { 'statusCode': 204, 'headers': __HTTP_HEADERS__, 'isBase64Encoded': False, 'body': '' } else: # create a response when sample is not found return { 'statusCode': 404, 'headers': __HTTP_HEADERS__, 'isBase64Encoded': False, 'body': json.dumps({"error": "sample not found"}) } else: return { 'statusCode': 422, 'headers': __HTTP_HEADERS__, 'isBase64Encoded': False, 'body': json.dumps({"error": "sample name is not provided!"}) } else: return { 'statusCode': 404, 'headers': __HTTP_HEADERS__, 'isBase64Encoded': False, 'body': json.dumps({ "error": "not supported, need's be called from a http event!" }) }
def get_all(events, context): """returns paged list with the latest status for the samples in the given experiment""" if 'pathParameters' in events: logger.info("events: {}".format(events)) tm = TableManager() table = tm.get_acquisition_table() if events['pathParameters'] is not None and 'sample' in events['pathParameters'] and 'experiment' in events[ 'pathParameters']: logger.info( f"Not the first page // {events['pathParameters']['sample']}// {events['pathParameters']['experiment']}") last = { "id": events['pathParameters']['sample'], "experiment": events['pathParameters']['experiment'] } else: logger.info("First page") last = None try: if last is None: result = table.scan(Limit=500) else: result = table.scan(ExclusiveStartKey=last, Limit=500) items = result['Items'] body = {"items": items} if 'LastEvaluatedKey' in result: body['last_item'] = result['LastEvaluatedKey'] data = { 'statusCode': 200, 'headers': __HTTP_HEADERS__, 'body': json.dumps(body) } return data except Exception as ex: logger.info("QUERY-ERROR: %s" % str(ex)) return { "statusCode": 418, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": ex.args}) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "not supported, need's be called from a http event!"}) }
def test_get_with_reference(requireMocking): # store data tm = TableManager() table = tm.get_acquisition_table() item = { 'sample': '180415dZKsa20_1', 'experiment': '12345', 'acquisition': { 'instrument': 'Leco GC-Tof', 'name': 'GCTOF', 'ionisation': 'positive', 'method': 'gcms' }, 'metadata': { 'class': '382172', 'species': 'rat', 'organ': 'tissue' }, 'userdata': { 'label': 'GP_S_6_006', 'comment': '' }, 'processing': { 'method': 'gcms | test | test | positive' }, 'time': 1525121375499, 'id': '180415dZKsa20_1', 'references': [{ 'name': 'minix', 'value': '12345' }] } try: validate(item, __ACQUISITION_SCHEMA__) table.put_item(Item=tm.sanitize_json_for_dynamo(item)) except ValidationException as vex: result = None fail(str(vex.body)) except ClientError as cer: result = None fail(str(cer.response)) # process data result = get.get({"pathParameters": {"sample": "180415dZKsa20_1"}}, {}) assert result['statusCode'] == 200 assert json.loads(result['body'])["id"] == "180415dZKsa20_1" assert json.loads( result['body'])["acquisition"]["instrument"] == "Leco GC-Tof" assert json.loads(result['body'])["references"][0]["name"] == "minix" assert json.loads(result['body'])["references"][0]["value"] == "12345"
def monitor_jobs(event, context): """ monitors the current jobs in the system. It asks the job table for all unfinished jobs if they are ready for processing TODO looks like it's only used in tests and nowhere else """ logger.info("job monitor triggered from event {}".format(event)) # 1. query JOB state table in state running tm = TableManager() table = tm.get_job_state_table() query_params = { 'IndexName': 'state-index', 'Select': 'ALL_ATTRIBUTES', 'KeyConditionExpression': Key('state').eq(SCHEDULED) } result = table.query(**query_params) if 'Items' in result: if len(result['Items']) == 0: logger.info("no jobs in state scheduled!") query_params = { 'IndexName': 'state-index', 'Select': 'ALL_ATTRIBUTES', 'FilterExpression': Attr('state').ne(SCHEDULED) } logger.info( "WARNING: never good todo a able scan!!! find a better solution" ) result = table.scan(**query_params) for x in result['Items']: try: if x['state'] in [ FAILED, AGGREGATED_AND_UPLOADED, AGGREGATING_SCHEDULED, AGGREGATING_SCHEDULING ]: continue sync_job(x) except Exception as e: traceback.print_exc() error_diagnostics = traceback.format_exc() update_job_state(job=x['id'], state=FAILED, reason=f"{str(e)} = {error_diagnostics}")
def get(events, context): """returns the specific element from the storage""" if 'pathParameters' in events: if 'sample' in events['pathParameters']: tm = TableManager() table = tm.get_acquisition_table() versions_table = tm.get_acquisition_table_version() sample = urllib.parse.unquote(events['pathParameters']['sample']) logger.info("looking for sample: {} ( raw its {} )".format(sample, events['pathParameters']['sample'])) result = table.query( KeyConditionExpression=Key('id').eq(sample) ) versions = versions_table.query( KeyConditionExpression=Key('id').eq(sample) ) if "Items" in result and len(result['Items']) > 0: record = result['Items'][0] if "Items" in versions and len(versions['Items']) > 0: record['versions'] = versions['Items'] return { "statusCode": 200, "headers": __HTTP_HEADERS__, "body": json.dumps(record) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "no sample found with this identifier : {}".format( events['pathParameters']['sample'])}) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "sample name is not provided!"}) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "not supported, need's be called from a http event!"}) }
def test_get_experiment(requireMocking, sample_count): tm = TableManager() table = tm.get_acquisition_table() try: for x in range(0, sample_count): table.put_item(Item=tm.sanitize_json_for_dynamo({ "sample": f"test-{x:06d}", "experiment": "1", "id": f"test-{x:06d}", "acquisition": { "instrument": "random", "ionisation": "positive", "method": "test-method" }, "metadata": { "class": f"{x%100}", "species": "rat", "organ": "tissue" }, "userdata": { "label": "GP_S_6_006", "comment": "" }, "processing": { "method": "test-method | random | test | positive" } })) except ValidationException as vex: result = None fail(str(vex.body)) except ClientError as cer: result = None fail(str(cer.response)) page_size = 3 result = experiment.get( {'pathParameters': { 'experiment': '1', 'psize': page_size }}, {}) data = json.loads(result['body']) assert result['statusCode'] == 200 assert len(data['items']) == page_size assert data['last_item']['id'] == 'test-000002'
def triggerEvent(data): """ submits the given data to the queue :param data: requires sample :return: a serialized version of the submitted message """ validate(data, __RESULT_SCHEMA__) timestamp = int(time.time() * 1000) data['time'] = timestamp data['id'] = data['sample'] if 'sample' in data: table = Bucket(os.environ['resultTable']) # lookup from the stasis tables the correct file handle # TODO right now we are faking it name = get_file_handle(data['id']) existing = table.exists(name) if existing: existing = json.loads(table.load(name)) # need to append and/or update result to injections data['injections'] = { **existing['injections'], **data['injections'] } result = table.save( name, json.dumps(TableManager().sanitize_json_for_dynamo(data))) return { 'body': json.dumps(data), 'statusCode': result['ResponseMetadata']['HTTPStatusCode'], 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } else: return { 'body': json.dumps({'error': 'no sample provided'}), 'statusCode': 400, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }
def triggerEvent(data): """ submits the given data to the queue :param data: requires sample :return: a serialized version of the submitted message """ logger.info("trigger event: " + json.dumps(data)) saved = {} try: validate(data, __ACQUISITION_SCHEMA__) timestamp = int(time.time() * 1000) data['time'] = timestamp data['id'] = data['sample'] # put item in table instead of queueing tm = TableManager() saved = store_acquisition_data(data, tm) tracked = save_sample_state(sample=data['sample'], state='entered', fileHandle=None, reason="new acquisition entered", optional=None) logger.info("added tracking for acquisition data") except ValidationException as vex: traceback.print_exc() data = str(vex.body) saved['ResponseMetadata']['HTTPStatusCode'] = 400 except Exception as ex: traceback.print_exc() data = str(ex) saved['ResponseMetadata']['HTTPStatusCode'] = 500 return { 'body': json.dumps(data), 'statusCode': saved['ResponseMetadata']['HTTPStatusCode'], 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }
def test_get(requireMocking): # store data tm = TableManager() table = tm.get_tracking_table() table.put_item(Item={ "id": "test", "experiment": "unknown", "sample": "test", "status": [{"time": 1524772162698, "value": "processing"}] }) # process data result = get.get({ "pathParameters": { "sample": "test" } }, {}) assert 200 == result['statusCode'] assert 'body' in result assert "test" == json.loads(result['body'])["id"]
def test_get_with_fileHandle(requireMocking): # store data tm = TableManager() table = tm.get_tracking_table() table.put_item(Item={ "id": "test", "experiment": "unknown", "sample": "test", "status": [{"time": 1524772162698, "value": "PROCESSING", "fileHandle": "test.mzml"}] }) # process data result = get.get({ "pathParameters": { "sample": "test" } }, {}) assert 200 == result['statusCode'] assert 'body' in result assert 'test' == json.loads(result['body'])['id'] assert 'test.mzml' == json.loads(result['body'])['status'][0]['fileHandle']
def get(events, context): """returns the specific sample from the storage""" if 'pathParameters' in events: if 'sample' in events['pathParameters']: tm = TableManager() table = tm.get_tracking_table() result = table.query( KeyConditionExpression=Key('id').eq(events['pathParameters']['sample']) ) if 'Items' in result and len(result['Items']) > 0: # create a response when sample is found return { "statusCode": 200, "headers": __HTTP_HEADERS__, "body": json.dumps(result['Items'][0]) } else: # create a response when sample is not found return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "sample not found"}) } else: return { "statusCode": 422, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "sample name is not provided!"}) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "not supported, need's be called from a http event!"}) }
def remove_sample_for_job(event, context): """ remove a sample from a stored job """ logger.info(event) if 'pathParameters' in event: parameters = event['pathParameters'] if 'sample' in parameters and 'job' in parameters: tm = TableManager() rid = tm.generate_job_id(parameters['job'], parameters['sample']) trktable = tm.get_job_sample_state_table() logger.info(f"generated id: {rid}") try: saved = trktable.delete_item(Key={ 'id': rid, 'job': parameters['job'] }) # save or update our item logger.info(saved) return { 'body': '', 'statusCode': 200, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } except Exception as e: traceback.print_exc() return { 'body': str(e), 'statusCode': 500, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } return {'statusCode': 503}
def get(events, context): """returns paged list with the latest status for the samples in the given experiment""" if 'pathParameters' in events: if 'experiment' in events['pathParameters']: expId = events['pathParameters']['experiment'] if 'psize' in events['pathParameters']: page_size = int(events['pathParameters']['psize']) else: page_size = 25 query_params = { 'IndexName': 'experiment-id-index', 'Select': 'ALL_ATTRIBUTES', 'KeyConditionExpression': Key('experiment').eq(expId), 'Limit': page_size } if 'lastSample' in events['pathParameters']: logger.info( f"Not the first page // {events['pathParameters']['lastSample']}" ) query_params['ExclusiveStartKey'] = { "experiment": expId, "id": events['pathParameters']['lastSample'] } tm = TableManager() table = tm.get_acquisition_table() try: result = table.query(**query_params) items = result['Items'] body = {"items": items} if 'LastEvaluatedKey' in result: body['last_item'] = result['LastEvaluatedKey'] data = { 'statusCode': 200, 'headers': __HTTP_HEADERS__, 'body': json.dumps(body) } return data except Exception as ex: logger.info("QUERY-ERROR: %s" % str(ex)) return { "statusCode": 418, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": ex.args}) } else: return { "statusCode": 422, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "sample name is not provided!"}) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({ "error": "not supported, need's be called from a http event!" }) }
def __init__(self, platform="fargate"): self.tm = TableManager() self.platform = platform
def status(event, context): """ returns the status of the current job, as well as some meta information TODO due to expense it might be better to store the whole calculation result in an addiitonal table, since it can take a LONG time to execute and so not perfect as solution for http requests """ if 'pathParameters' in event: parameters = event['pathParameters'] if 'job' in parameters: job = parameters['job'] tm = TableManager() table_overall_state = tm.get_job_state_table() if 'body' in event and event.get("httpMethod", "") != 'GET': content = json.loads(event['body']) result = update_job_state(job, content['job_state'], content.get("reason", "")) return { "statusCode": 200, "headers": __HTTP_HEADERS__, "body": json.dumps({ "job_state": result, "job_info": result } ) } else: job_state = table_overall_state.query( **{ 'IndexName': 'job-id-state-index', 'Select': 'ALL_ATTRIBUTES', 'KeyConditionExpression': Key('job').eq(job) } ) # this queries the state of all the samples if "Items" in job_state and len(job_state['Items']) > 0: job_state = job_state["Items"] if len(job_state) > 0: job_state = job_state[0] return { "statusCode": 200, "headers": __HTTP_HEADERS__, "body": json.dumps({ "job_state": job_state['state'], "job_info": job_state } ) } else: return { "statusCode": 503, "headers": __HTTP_HEADERS__, "body": json.dumps({ "job_state": "no associated state found!", } ) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "no job found with this identifier : {}".format( event['pathParameters']['job'])}) } # invalid! return { 'statusCode': 503 }
def description(event, context): """ returns the complete job description, which can be rather long """ if 'pathParameters' in event: parameters = event['pathParameters'] if 'job' in parameters: job = parameters['job'] tm = TableManager() table = tm.get_job_sample_state_table() if 'psize' in event['pathParameters']: page_size = int(event['pathParameters']['psize']) else: page_size = 10 query_params = { 'IndexName': 'job-id-index', 'Select': 'ALL_ATTRIBUTES', 'KeyConditionExpression': Key('job').eq(job), 'Limit': page_size } if 'last_key' in event['pathParameters']: logger.info(f"pagination mode, last key was {event['pathParameters']['last_key']}") query_params['ExclusiveStartKey'] = { "job": job, "id": event['pathParameters']['last_key'] } result = table.query(**query_params ) if "Items" in result and len(result['Items']) > 0: # here we now need to reference the actual stasis tracking table result = result['Items'] result = list(filter(lambda x: x is not None,result)) # kinda expensive and should be avoided final_result = [] for x in result: tracked_sample = get_tracked_sample(x['sample']) if tracked_sample is None: logger.info(f"the tracked sample was not found: {x['sample']}") else: x['history'] = get_tracked_sample(x['sample'])['status'] x['state'] = max(x['history'], key=lambda y: y['priority'])['value'] final_result.append(x) return { "statusCode": 200, "headers": __HTTP_HEADERS__, "body": json.dumps( final_result ) } else: return { "statusCode": 404, "headers": __HTTP_HEADERS__, "body": json.dumps({"error": "no job found with this identifier : {}".format( event['pathParameters']['job'])}) } # invalid! return { 'statusCode': 503 }
def __init__(self): self.tm = TableManager() self.table = self.tm.get_configuration_table()
def test_sync_currently_processing(requireMocking, mocked_10_sample_job): tm = TableManager() for i in range(0, 10): tracking.create({'body': json.dumps( { "job": "12345", "sample": "abc_{}".format(i), "state": SCHEDULED } )}, {}) assert load_job_samples_with_states("12345")['abc_{}'.format(i)] == "scheduled" # dummy stasis data which need to be in the system for this test to pass tm.get_tracking_table().put_item(Item= { "experiment": "12345", "id": "abc_{}".format(i), "sample": "abc_{}".format(i), "status": [ { "fileHandle": "abc_{}.d".format(i), "priority": 1, "time": 1563307359163, "value": "entered" }, { "fileHandle": "abc_{}.d".format(i), "priority": 100, "time": 1563307360393, "value": "acquired" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 200, "time": 1563307361543, "value": "converted" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 300, "time": 1563330092360, "value": "scheduled" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 410, "time": 1563330183632, "value": "deconvoluted" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 420, "time": 1563330184868, "value": "corrected" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 430, "time": 1563330189108, "value": "annotated" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 440, "time": 1563330190650, "value": "quantified" }, { "fileHandle": "abc_{}.mzml".format(i), "priority": 450, "time": 1563330244348, "value": "replaced" } ] } ) calculate_job_state(job="12345") assert all(value == str(REPLACED) for value in load_job_samples_with_states("12345").values())
def store_sample_for_job(event, context): """ stores an associated sample for an job :param event: :param context: :return: """ body = json.loads(event['body']) try: validate(body, __SAMPLE_JOB_SCHEMA__) except Exception as e: logger.info(f"received body was considered invalid: {body}") traceback.print_exc() return { 'body': json.dumps({ 'state': str(FAILED), 'reason': str(e) }), 'statusCode': 503, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } tracking = body.get('meta', {}).get('tracking', []) meta = body.get('meta', {}) meta.pop('tracking', None) sample = body.get('sample') job = body.get("job") try: # overwrite tracking states and extension if it's provided for track in tracking: if 'extension' in track: fileHandle = "{}.{}".format(sample, track['extension']) else: fileHandle = None save_sample_state(sample=sample, state=track['state'], fileHandle=fileHandle) if len(meta) > 0: # update the class here by updating the acquisition part # 1. get acquisition data # 2. set new metadata tm = TableManager() acqtable = tm.get_acquisition_table() result = acqtable.query( KeyConditionExpression=Key('id').eq(sample)) if "Items" in result and len(result['Items']) > 0: data = result['Items'][0] else: timestamp = int(time.time() * 1000) data = { 'time': timestamp, 'id': sample, 'sample': sample, 'experiment': _fetch_experiment(sample), 'acquisition': { 'instrument': 'unknown', 'ionisation': 'unknown', 'method': 'unknown' }, 'processing': { 'method': 'unknown' }, } try: data = Merge().data_merge(data, meta) store_acquisition_data(data, tm) except Exception as e: logger.info(f"generated data was considered invalid: {data}") traceback.print_exc() return { 'body': json.dumps({ 'state': str(FAILED), 'reason': str(traceback.format_exc()) }), 'statusCode': 503, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } set_sample_job_state(job=job, sample=sample, state=SCHEDULING) return { 'body': json.dumps({ 'state': str(SCHEDULING), 'job': job, 'sample': sample, 'reason': 'sample was submitted' }), 'statusCode': 200, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ } except Exception as e: # update job state in the system to failed with the related reason error_diagnostics = traceback.format_exc() set_sample_job_state(job=job, sample=sample, state=FAILED, reason=f"{str(e)} = {error_diagnostics}") traceback.print_exc() return { 'body': json.dumps({ 'state': str(FAILED), 'job': job, 'sample': sample, 'reason': str(e) }), 'statusCode': 500, 'isBase64Encoded': False, 'headers': __HTTP_HEADERS__ }