Esempio n. 1
0
def track_sample_state_on_stopped_job(detail, event):
    """
    persists all the definitions of a sample and it's final state, if the state is not a finish state, it will autoamtically set the
    sample to failed
    """

    overrides = detail['overrides']
    container = detail['containers'][0]

    env = overrides['containerOverrides'][0]['environment']

    data = {}
    for x in env:
        name = x['name']
        value = x['value']
        data[name] = value

    data['start_time'] = detail['startedAt']
    data['end_time'] = detail['stoppedAt']

    # compute the state of the sample here

    # compute which state it should be in

    # if its wrong, than mark the sample as failed

    if "CARROT_SAMPLE" in data:
        data['sample'] = data['CARROT_SAMPLE']
        data['id'] = event['id']
        data['timestamp'] = time.time()
        sample = get_file_by_handle(data['CARROT_SAMPLE'])
        state = get_tracked_state(sample)

        if state in ['uploaded', 'failed']:
            # all good
            logger.info(
                f"task finished in an acceptable state. {sample} with {state}")
        else:
            data['observed_state'] = state
            logger.warning(
                f"task finished with some state, which would stall operations. Manually setting it as failed for {sample} with state {state}, to avoid stalling"
            )
            save_sample_state(
                sample, "failed", data['CARROT_SAMPLE'],
                f"the final state for this file was '{state}', which was not sufficient and would stall the computations",
                event)
            insert_stall_event(data)
    else:
        logger.info(
            f"was not a sample based task, moving on. {json.dumps(data, indent=4)}"
        )
Esempio n. 2
0
def triggerEvent(data):
    """
        submits the given data to the table (previously queue)

    :param data: requires sample and status in it, to be considered validd
    :return: a serialized version of the submitted message
    """
    try:
        validate(data, __TRACKING_SCHEMA__)
    except ValidationError as e:
        return {
            'body': json.dumps({
                'error': str(e),
                'content': data
            }),
            'statusCode': 500,
            'isBase64Encoded': False,
            'headers': __HTTP_HEADERS__
        }

    item = save_sample_state(sample=data['sample'],
                             state=data['status'],
                             fileHandle=data.get('fileHandle', None),
                             reason=data.get('reason',
                                             data.get('failed', None)),
                             optional=data.get('optional', None))[0]

    return {
        'body': json.dumps(item, use_decimal=True),
        'statusCode': 200,
        'isBase64Encoded': False,
        'headers': __HTTP_HEADERS__
    }
Esempio n. 3
0
def triggerEvent(data):
    """
        submits the given data to the queue

    :param data: requires sample
    :return: a serialized version of the submitted message
    """
    logger.info("trigger event: " + json.dumps(data))
    saved = {}

    try:
        validate(data, __ACQUISITION_SCHEMA__)

        timestamp = int(time.time() * 1000)
        data['time'] = timestamp
        data['id'] = data['sample']

        # put item in table instead of queueing
        tm = TableManager()
        saved = store_acquisition_data(data, tm)

        tracked = save_sample_state(sample=data['sample'],
                                    state='entered',
                                    fileHandle=None,
                                    reason="new acquisition entered",
                                    optional=None)

        logger.info("added tracking for acquisition data")

    except ValidationException as vex:
        traceback.print_exc()
        data = str(vex.body)
        saved['ResponseMetadata']['HTTPStatusCode'] = 400
    except Exception as ex:
        traceback.print_exc()
        data = str(ex)
        saved['ResponseMetadata']['HTTPStatusCode'] = 500

    return {
        'body': json.dumps(data),
        'statusCode': saved['ResponseMetadata']['HTTPStatusCode'],
        'isBase64Encoded': False,
        'headers': __HTTP_HEADERS__
    }
Esempio n. 4
0
def bucket_json(event, context):
    """
    handles json trigger events
    :param event:
    :param context:
    :return:
    """

    logger.info(f"bucket json got trigger: {event}")

    if len(event['Records']) == 0:
        logger.info("no records found!")
        return
    for record in event['Records']:
        logger.info(f"received record: {record}")
        o = record['s3']['object']
        k = str(o['key'])

        logger.info("received key {}".format(k))
        sample = get_file_by_handle(k)
        logger.info("sample was uploaded: {}".format(sample))
        result = save_sample_state(
            sample=sample,
            state=UPLOADED,
            fileHandle=k,
            reason="processed file was uploaded to the bucket")

        if result is None:
            logger.info(
                "we were not able to update the sample: {}".format(sample))
        else:
            logger.info("sample state was set to: {}".format(result))
            jobs = load_jobs_for_sample(sample)

            if jobs is not None:
                logger.info("found {} associated jobs for this sample".format(
                    len(jobs)))
                for job in jobs:
                    from stasis.jobs.sync import sync_job
                    sync_job(job=job)
            else:
                logger.info("we did not find a job for this sample!")
Esempio n. 5
0
def schedule_processing_to_fargate(event, context):
    """
    submits a new task to the cluster - a fargate task will run it
    :param event:
    :param context:
    :return:
    """
    body = json.loads(event['body'])

    try:

        validate(body, __SCHEDULE__)
        import boto3
        overrides = {
            "containerOverrides": [{
                "name":
                "carrot-runner",
                "environment": [
                    {
                        "name":
                        "SPRING_PROFILES_ACTIVE",
                        "value":
                        "{}{},{}".format('aws', os.getenv('current_stage'),
                                         body["profile"])
                        # AWS profile needs to be active for this system to connect to the AWS database
                    },
                    {
                        "name": "CARROT_SAMPLE",
                        "value": "{}".format(body['sample'])
                    },
                    {
                        "name": "CARROT_METHOD",
                        "value": "{}".format(body['method'])
                    },
                    {
                        "name": "CARROT_MODE",
                        "value": "{}".format(body['profile'])
                    },
                ]
            }]
        }

        task_name = "{}-{}".format(os.getenv("current_stage"),
                                   SECURE_CARROT_RUNNER)

        if 'key' in body and body['key'] is not None:
            overrides['containerOverrides'][0]['environment'].append({
                "name":
                "STASIS_KEY",
                "value":
                body['key']
            })

        send_to_fargate(overrides=overrides, task_name=task_name)

        save_sample_state(sample=body['sample'],
                          state='scheduled',
                          reason='discovered by fargate runner',
                          fileHandle=None)

        return {
            'statusCode': 200,
            'isBase64Encoded': False,
            'headers': __HTTP_HEADERS__
        }

    except ValidationError as e:
        logger.info("validation error")
        logger.info(body)
        traceback.print_exc()

        return {
            'body': json.dumps(str(e)),
            'statusCode': 503,
            'isBase64Encoded': False,
            'headers': __HTTP_HEADERS__
        }
        pass
    except Exception as e:
        logger.info(body)
        traceback.print_exc()
        create(
            {
                "body":
                json.dumps({
                    'sample': body['sample'],
                    'status': FAILED,
                    'reason': str(e)
                })
            }, {})

        return {
            'body': json.dumps(str(e)),
            'statusCode': 503,
            'isBase64Encoded': False,
            'headers': __HTTP_HEADERS__
        }
Esempio n. 6
0
def test_calculate_job_state_with_zip_upload(requireMocking,
                                             mocked_10_sample_job):
    # set 1 sample state to exported
    save_sample_state(sample="abc_0", state=EXPORTED)
    # this should update the job state accordingly to processing
    sync_job(get_job_config("12345"))
    state = get_job_state("12345")
    assert state == PROCESSING
    # set sample state to failed
    save_sample_state(sample="abc_1", state=FAILED)
    sync_job(get_job_config("12345"))
    # this should keep the job state in state processing
    state = get_job_state("12345")
    assert state == PROCESSING

    # set all other samples to exported

    save_sample_state(sample="abc_2", state=EXPORTED)
    save_sample_state(sample="abc_3", state=EXPORTED)
    save_sample_state(sample="abc_4", state=EXPORTED)
    save_sample_state(sample="abc_5", state=EXPORTED)
    save_sample_state(sample="abc_6", state=EXPORTED)
    save_sample_state(sample="abc_7", state=EXPORTED)
    save_sample_state(sample="abc_8", state=EXPORTED)
    save_sample_state(sample="abc_9", state=EXPORTED)
    sync_job(get_job_config("12345"))
    # this should set the job state to aggregation scheduling now
    state = get_job_state("12345")
    assert state == AGGREGATING_SCHEDULED

    # trigger an upload to the zip bucket
    bucket_zip({'Records': [{'s3': {'object': {'key': '12345.zip'}}}]}, {})

    # job should now be aggregated
    state = get_job_state("12345")
    assert state == AGGREGATED_AND_UPLOADED
Esempio n. 7
0
def test_calculate_job_state_2(requireMocking, mocked_10_sample_job):
    result = schedule_job({'pathParameters': {"job": "12345"}}, {})

    watch_job_schedule_queue()
    state = get_job_state("12345")
    assert state == SCHEDULED

    # set 1 sample state to failed
    save_sample_state(sample="abc_0", state=FAILED)
    sync_job(get_job_config("12345"))
    # this should update the job state accordingly to processing
    state = get_job_state("12345")

    assert state == PROCESSING
    # set sample state to failed
    save_sample_state(sample="abc_1", state=FAILED)
    # this should keep the job state in state processing
    sync_job(get_job_config("12345"))
    state = get_job_state("12345")
    assert state == PROCESSING

    # set all other samples to exported

    save_sample_state(sample="abc_2", state=EXPORTED)
    save_sample_state(sample="abc_3", state=EXPORTED)
    save_sample_state(sample="abc_4", state=EXPORTED)
    save_sample_state(sample="abc_5", state=EXPORTED)
    save_sample_state(sample="abc_6", state=EXPORTED)
    save_sample_state(sample="abc_7", state=EXPORTED)
    save_sample_state(sample="abc_8", state=EXPORTED)
    save_sample_state(sample="abc_9", state=EXPORTED)
    sync_job(get_job_config("12345"))
    # this should set the job state to aggregation scheduling now
    state = get_job_state("12345")
    assert state == AGGREGATING_SCHEDULED
Esempio n. 8
0
def store_sample_for_job(event, context):
    """
    stores an associated sample for an job
    :param event:
    :param context:
    :return:
    """

    body = json.loads(event['body'])
    try:
        validate(body, __SAMPLE_JOB_SCHEMA__)
    except Exception as e:
        logger.info(f"received body was considered invalid: {body}")
        traceback.print_exc()

        return {
            'body': json.dumps({
                'state': str(FAILED),
                'reason': str(e)
            }),
            'statusCode': 503,
            'isBase64Encoded': False,
            'headers': __HTTP_HEADERS__
        }
    tracking = body.get('meta', {}).get('tracking', [])
    meta = body.get('meta', {})
    meta.pop('tracking', None)

    sample = body.get('sample')
    job = body.get("job")

    try:
        # overwrite tracking states and extension if it's provided
        for track in tracking:
            if 'extension' in track:
                fileHandle = "{}.{}".format(sample, track['extension'])
            else:
                fileHandle = None

            save_sample_state(sample=sample,
                              state=track['state'],
                              fileHandle=fileHandle)

        if len(meta) > 0:
            # update the class here by updating the acquisition part
            # 1. get acquisition data
            # 2. set new metadata
            tm = TableManager()
            acqtable = tm.get_acquisition_table()
            result = acqtable.query(
                KeyConditionExpression=Key('id').eq(sample))

            if "Items" in result and len(result['Items']) > 0:
                data = result['Items'][0]
            else:

                timestamp = int(time.time() * 1000)
                data = {
                    'time': timestamp,
                    'id': sample,
                    'sample': sample,
                    'experiment': _fetch_experiment(sample),
                    'acquisition': {
                        'instrument': 'unknown',
                        'ionisation': 'unknown',
                        'method': 'unknown'
                    },
                    'processing': {
                        'method': 'unknown'
                    },
                }

            try:
                data = Merge().data_merge(data, meta)
                store_acquisition_data(data, tm)
            except Exception as e:
                logger.info(f"generated data was considered invalid: {data}")
                traceback.print_exc()

                return {
                    'body':
                    json.dumps({
                        'state': str(FAILED),
                        'reason': str(traceback.format_exc())
                    }),
                    'statusCode':
                    503,
                    'isBase64Encoded':
                    False,
                    'headers':
                    __HTTP_HEADERS__
                }

        set_sample_job_state(job=job, sample=sample, state=SCHEDULING)

        return {
            'body':
            json.dumps({
                'state': str(SCHEDULING),
                'job': job,
                'sample': sample,
                'reason': 'sample was submitted'
            }),
            'statusCode':
            200,
            'isBase64Encoded':
            False,
            'headers':
            __HTTP_HEADERS__
        }
    except Exception as e:
        # update job state in the system to failed with the related reason

        error_diagnostics = traceback.format_exc()
        set_sample_job_state(job=job,
                             sample=sample,
                             state=FAILED,
                             reason=f"{str(e)} = {error_diagnostics}")

        traceback.print_exc()
        return {
            'body':
            json.dumps({
                'state': str(FAILED),
                'job': job,
                'sample': sample,
                'reason': str(e)
            }),
            'statusCode':
            500,
            'isBase64Encoded':
            False,
            'headers':
            __HTTP_HEADERS__
        }