def lambda_handler(event, context):
    print('Event Received: {}'.format(json.dumps(event)))
    extra_params = {
        'beaconId': BEACON_ID,
    }
    if event['httpMethod'] == 'POST':
        event_body = event.get('body')
        if not event_body:
            return bad_request('No body sent with request.', extra_params)
        try:
            parameters = json.loads(event_body)
        except ValueError:
            return bad_request('Error parsing request body, Expected JSON.',
                               extra_params)
    else:  # method == 'GET'
        parameters = event['queryStringParameters']
        if not parameters:
            return bad_request('No query parameters sent with request.',
                               extra_params)
        multi_values = event['multiValueQueryStringParameters']
        parameters['datasetIds'] = multi_values.get('datasetIds')
        for int_field in ('start', 'end', 'startMin', 'startMax', 'endMin',
                          'endMax'):
            if int_field in parameters:
                try:
                    parameters[int_field] = int(parameters[int_field])
                except ValueError:
                    # Cannot be formatted as an integer, handle in validation
                    pass
    return query_datasets(parameters)
Esempio n. 2
0
def lambda_handler(event, context):
    print('Event Received: {}'.format(json.dumps(event)))
    extra_params = {
        'beaconId': BEACON_ID,
    }
    if event['httpMethod'] == 'POST':
        event_body = event.get('body')
        if not event_body:
            return bad_request('No body sent with request.', extra_params)
        try:
            parameters = json.loads(event_body)
        except ValueError:
            return bad_request('Error parsing request body, Expected JSON.',
                               extra_params)
        parameters['queries'] = get_queries(parameters)
    else:  # method == 'GET'
        parameters = event['queryStringParameters']
        if not parameters:
            return bad_request('No query parameters sent with request.',
                               extra_params)
        multi_values = event['multiValueQueryStringParameters']
        parameters['queries'] = get_queries(multi_values)
        parameters['datasetIds'] = multi_values.get('datasetIds')
        parameters['sampleFields'] = multi_values.get('sampleFields')
        for int_field in ('page', 'pageSize', 'variantsDescending', 'similar'):
            if int_field in parameters:
                parameters[int_field] = int_or_self(parameters[int_field])
    return query_datasets(parameters, context)
Esempio n. 3
0
def lambda_handler(event, context):
    print('Event Received: {}'.format(json.dumps(event)))
    event_body = event.get('body')
    if not event_body:
        return bad_request('No body sent with request.')
    try:
        body_dict = json.loads(event_body)
    except ValueError:
        return bad_request('Error parsing request body, Expected JSON.')
    method = event['httpMethod']
    return submit_dataset(body_dict, method)
def lambda_handler(event, context):
    print('Event Received: {}'.format(json.dumps(event)))
    time_assigned = (context.get_remaining_time_in_millis()-MILLISECONDS_BEFORE_SPLIT)
    print("time assigned",time_assigned)
    timeStart = time.time()
    event_body = event.get('body')
    if not event_body:
            return bad_request('No body sent with request.')
    try:
        body_dict = json.loads(event_body)
        requestID = event['requestContext']['requestId']
        location = body_dict['location']
        vcf_regions = get_translated_regions(location)
    except ValueError:
        return bad_request('Error parsing request body, Expected JSON.')

    batchID = ''
    print(vcf_regions)
    for index,region in enumerate(vcf_regions):
        if( (time.time() - timeStart)*1000 > time_assigned):
            newRegions = vcf_regions[index:]
            batchID = ''
            print("New Regions ",newRegions)
            #publish Sns for itself!
            kwargs = {
                'TopicArn': QUERY_VCF_EXTENDED_SNS_TOPIC_ARN,
            }
            kwargs['Message'] = json.dumps({'regions' : newRegions,'requestID' : requestID, 'location': location})
            print('Publishing to SNS: {}'.format(json.dumps(kwargs)))
            response = sns.publish(**kwargs)
            print('Received Response: {}'.format(json.dumps(response)))
            break
        else:
            chrom, start_str = region.split(':')
            regionID = chrom+"_"+start_str
            start = round(1000000 * float(start_str) + 1)
            end = start + round(1000000 * SLICE_SIZE_MBP - 1)
            all_coords, all_changes = get_regions_and_variants(location, chrom, start, end, time_assigned)
            total_coords = [all_coords[x:x+RECORDS_PER_SAMPLE] for x in range(0, len(all_coords), RECORDS_PER_SAMPLE)]
            total_changes = [all_changes[x:x+RECORDS_PER_SAMPLE] for x in range(0, len(all_changes), RECORDS_PER_SAMPLE)]
            batchID = submitQueryGTF(total_coords,total_changes,requestID,regionID)
    if(batchID != ''):
        print("sending for concat")
        kwargs = {
            'TopicArn': CONCAT_SNS_TOPIC_ARN,
        }
        kwargs['Message'] = json.dumps({'APIid' : requestID,'lastBatchID' : batchID})
        print('Publishing to SNS: {}'.format(json.dumps(kwargs)))
        response = sns.publish(**kwargs)
        print('Received Response: {}'.format(json.dumps(response)))

    return bundle_response(200, "Process started")
Esempio n. 5
0
def submit_dataset(body_dict, method):
    new = method == 'POST'
    validation_error = validate_request(body_dict, new)
    if validation_error:
        return bad_request(validation_error)
    if 'vcfLocations' in body_dict:
        errors = check_vcf_locations(body_dict['vcfLocations'])
        if errors:
            return bad_request(errors)
        summarise = True
    else:
        summarise = False
    if new:
        create_dataset(body_dict)
    else:
        update_dataset(body_dict)
    if summarise:
        summarise_dataset(body_dict['id'])
    return bundle_response(200, {})
Esempio n. 6
0
def query_datasets(parameters, context):
    response_dict = {
        'beaconId': BEACON_ID,
        'apiVersion': None,
        'alleleRequest': parameters,
    }
    validation_error = validate_request(parameters)
    if validation_error:
        return bad_request(validation_error, response_dict)

    responses = Caches(
        dynamodb_client=dynamodb,
        lambda_client=aws_lambda,
        s3_client=s3,
    )
    datasets = get_datasets(
        parameters['assemblyId'],
        [query['referenceName'] for query in parameters['queries']],
        parameters.get('datasetIds'),
    )

    query_details_list = get_query_details_list(parameters['queries'])
    page_details = {
        'page': parameters.get('page', 1),
        'page_size': parameters.get('pageSize'),
        'sortby': parameters.get('variantsSortby', 'pos'),
        'desc': bool(parameters.get('variantsDescending')),
    }
    include_datasets = parameters.get('includeDatasetResponses', 'NONE')
    iupac = parameters.get('iupac', 'True')
    similar = bool(parameters.get('similar'))
    call_collate_queries(datasets, query_details_list,
                         parameters.get('queryCombination'), page_details,
                         parameters.get('sampleFields'), include_datasets,
                         iupac, similar, responses)
    dataset_responses = []
    exists = False
    for response in responses.collect_responses():
        result = response.result
        if not result or 'exists' not in result:
            # function errored out, ignore
            continue
        if not exists and result['exists']:
            exists = True
            if include_datasets == 'NONE':
                break
        if result.pop('include'):
            dataset_responses.append(result)
    dataset_responses.sort(key=lambda r: r['datasetId'])
    response_dict.update({
        'exists': exists,
        'datasetAlleleResponses': dataset_responses or None,
    })
    response_dict = check_size(response_dict, context)
    return bundle_response(200, response_dict)
def query_datasets(parameters):
    response_dict = {
        'beaconId': BEACON_ID,
        'apiVersion': None,
        'alleleRequest': parameters,
    }
    validation_error = validate_request(parameters)
    if validation_error:
        return bad_request(validation_error, response_dict)

    datasets = get_datasets(parameters['assemblyId'],
                            parameters.get('datasetIds'))

    reference_name = parameters['referenceName']
    vcf_chromosomes = get_vcf_chromosome_map(datasets, reference_name)
    start = parameters.get('start')
    if start is None:
        region_start = parameters['startMin']
        region_end = parameters['startMax']
        end_min = parameters['endMin']
        end_max = parameters['endMax']
    else:
        region_start = region_end = start
        end = parameters.get('end')
        if end is None:
            end = start
        end_min = end_max = end
    reference_bases = parameters['referenceBases']
    # account for the 1-based indexing of vcf files
    region_start += 1
    region_end += 1
    end_min += 1
    end_max += 1
    if reference_bases != 'N':
        # For specific reference bases region may be smaller
        max_offset = len(reference_bases) - 1
        end_max = min(region_end + max_offset, end_max)
        region_start = max(end_min - max_offset, region_start)
        if end_min > end_max or region_start > region_end:
            # Region search will find nothing, search a dummy region
            region_start = 2000000000
            region_end = region_start
            end_min = region_start + max_offset
            end_max = end_min
    alternate_bases = parameters.get('alternateBases')
    variant_type = parameters.get('variantType')
    include_datasets = parameters.get('includeDatasetResponses', 'NONE')
    responses = queue.Queue()
    threads = []
    for dataset in datasets:
        dataset_id = dataset['id']['S']
        vcf_locations = {vcf: vcf_chromosomes[vcf]
                         for vcf in dataset['vcfLocations']['SS']
                         if vcf_chromosomes[vcf]}
        t = threading.Thread(target=perform_query,
                             kwargs={
                                 'dataset_id': dataset_id,
                                 'vcf_locations': vcf_locations,
                                 'reference_bases': reference_bases,
                                 'region_start': region_start,
                                 'region_end': region_end,
                                 'end_min': end_min,
                                 'end_max': end_max,
                                 'alternate_bases': alternate_bases,
                                 'variant_type': variant_type,
                                 'include_datasets': include_datasets,
                                 'responses': responses,
                             })
        t.start()
        threads.append(t)
    num_threads = len(threads)
    processed = 0
    dataset_responses = []
    exists = False
    while processed < num_threads and (include_datasets != 'NONE'
                                       or not exists):
        response = responses.get()
        processed += 1
        if 'exists' not in response:
            # function errored out, ignore
            continue
        exists = exists or response['exists']
        if response.pop('include'):
            dataset_responses.append(response)
    response_dict.update({
        'exists': exists,
        'datasetAlleleResponses': dataset_responses or None,
    })
    return bundle_response(200, response_dict)