def lambda_handler(event, context): print('Event Received: {}'.format(json.dumps(event))) extra_params = { 'beaconId': BEACON_ID, } if event['httpMethod'] == 'POST': event_body = event.get('body') if not event_body: return bad_request('No body sent with request.', extra_params) try: parameters = json.loads(event_body) except ValueError: return bad_request('Error parsing request body, Expected JSON.', extra_params) else: # method == 'GET' parameters = event['queryStringParameters'] if not parameters: return bad_request('No query parameters sent with request.', extra_params) multi_values = event['multiValueQueryStringParameters'] parameters['datasetIds'] = multi_values.get('datasetIds') for int_field in ('start', 'end', 'startMin', 'startMax', 'endMin', 'endMax'): if int_field in parameters: try: parameters[int_field] = int(parameters[int_field]) except ValueError: # Cannot be formatted as an integer, handle in validation pass return query_datasets(parameters)
def lambda_handler(event, context): print('Event Received: {}'.format(json.dumps(event))) extra_params = { 'beaconId': BEACON_ID, } if event['httpMethod'] == 'POST': event_body = event.get('body') if not event_body: return bad_request('No body sent with request.', extra_params) try: parameters = json.loads(event_body) except ValueError: return bad_request('Error parsing request body, Expected JSON.', extra_params) parameters['queries'] = get_queries(parameters) else: # method == 'GET' parameters = event['queryStringParameters'] if not parameters: return bad_request('No query parameters sent with request.', extra_params) multi_values = event['multiValueQueryStringParameters'] parameters['queries'] = get_queries(multi_values) parameters['datasetIds'] = multi_values.get('datasetIds') parameters['sampleFields'] = multi_values.get('sampleFields') for int_field in ('page', 'pageSize', 'variantsDescending', 'similar'): if int_field in parameters: parameters[int_field] = int_or_self(parameters[int_field]) return query_datasets(parameters, context)
def lambda_handler(event, context): print('Event Received: {}'.format(json.dumps(event))) event_body = event.get('body') if not event_body: return bad_request('No body sent with request.') try: body_dict = json.loads(event_body) except ValueError: return bad_request('Error parsing request body, Expected JSON.') method = event['httpMethod'] return submit_dataset(body_dict, method)
def lambda_handler(event, context): print('Event Received: {}'.format(json.dumps(event))) time_assigned = (context.get_remaining_time_in_millis()-MILLISECONDS_BEFORE_SPLIT) print("time assigned",time_assigned) timeStart = time.time() event_body = event.get('body') if not event_body: return bad_request('No body sent with request.') try: body_dict = json.loads(event_body) requestID = event['requestContext']['requestId'] location = body_dict['location'] vcf_regions = get_translated_regions(location) except ValueError: return bad_request('Error parsing request body, Expected JSON.') batchID = '' print(vcf_regions) for index,region in enumerate(vcf_regions): if( (time.time() - timeStart)*1000 > time_assigned): newRegions = vcf_regions[index:] batchID = '' print("New Regions ",newRegions) #publish Sns for itself! kwargs = { 'TopicArn': QUERY_VCF_EXTENDED_SNS_TOPIC_ARN, } kwargs['Message'] = json.dumps({'regions' : newRegions,'requestID' : requestID, 'location': location}) print('Publishing to SNS: {}'.format(json.dumps(kwargs))) response = sns.publish(**kwargs) print('Received Response: {}'.format(json.dumps(response))) break else: chrom, start_str = region.split(':') regionID = chrom+"_"+start_str start = round(1000000 * float(start_str) + 1) end = start + round(1000000 * SLICE_SIZE_MBP - 1) all_coords, all_changes = get_regions_and_variants(location, chrom, start, end, time_assigned) total_coords = [all_coords[x:x+RECORDS_PER_SAMPLE] for x in range(0, len(all_coords), RECORDS_PER_SAMPLE)] total_changes = [all_changes[x:x+RECORDS_PER_SAMPLE] for x in range(0, len(all_changes), RECORDS_PER_SAMPLE)] batchID = submitQueryGTF(total_coords,total_changes,requestID,regionID) if(batchID != ''): print("sending for concat") kwargs = { 'TopicArn': CONCAT_SNS_TOPIC_ARN, } kwargs['Message'] = json.dumps({'APIid' : requestID,'lastBatchID' : batchID}) print('Publishing to SNS: {}'.format(json.dumps(kwargs))) response = sns.publish(**kwargs) print('Received Response: {}'.format(json.dumps(response))) return bundle_response(200, "Process started")
def submit_dataset(body_dict, method): new = method == 'POST' validation_error = validate_request(body_dict, new) if validation_error: return bad_request(validation_error) if 'vcfLocations' in body_dict: errors = check_vcf_locations(body_dict['vcfLocations']) if errors: return bad_request(errors) summarise = True else: summarise = False if new: create_dataset(body_dict) else: update_dataset(body_dict) if summarise: summarise_dataset(body_dict['id']) return bundle_response(200, {})
def query_datasets(parameters, context): response_dict = { 'beaconId': BEACON_ID, 'apiVersion': None, 'alleleRequest': parameters, } validation_error = validate_request(parameters) if validation_error: return bad_request(validation_error, response_dict) responses = Caches( dynamodb_client=dynamodb, lambda_client=aws_lambda, s3_client=s3, ) datasets = get_datasets( parameters['assemblyId'], [query['referenceName'] for query in parameters['queries']], parameters.get('datasetIds'), ) query_details_list = get_query_details_list(parameters['queries']) page_details = { 'page': parameters.get('page', 1), 'page_size': parameters.get('pageSize'), 'sortby': parameters.get('variantsSortby', 'pos'), 'desc': bool(parameters.get('variantsDescending')), } include_datasets = parameters.get('includeDatasetResponses', 'NONE') iupac = parameters.get('iupac', 'True') similar = bool(parameters.get('similar')) call_collate_queries(datasets, query_details_list, parameters.get('queryCombination'), page_details, parameters.get('sampleFields'), include_datasets, iupac, similar, responses) dataset_responses = [] exists = False for response in responses.collect_responses(): result = response.result if not result or 'exists' not in result: # function errored out, ignore continue if not exists and result['exists']: exists = True if include_datasets == 'NONE': break if result.pop('include'): dataset_responses.append(result) dataset_responses.sort(key=lambda r: r['datasetId']) response_dict.update({ 'exists': exists, 'datasetAlleleResponses': dataset_responses or None, }) response_dict = check_size(response_dict, context) return bundle_response(200, response_dict)
def query_datasets(parameters): response_dict = { 'beaconId': BEACON_ID, 'apiVersion': None, 'alleleRequest': parameters, } validation_error = validate_request(parameters) if validation_error: return bad_request(validation_error, response_dict) datasets = get_datasets(parameters['assemblyId'], parameters.get('datasetIds')) reference_name = parameters['referenceName'] vcf_chromosomes = get_vcf_chromosome_map(datasets, reference_name) start = parameters.get('start') if start is None: region_start = parameters['startMin'] region_end = parameters['startMax'] end_min = parameters['endMin'] end_max = parameters['endMax'] else: region_start = region_end = start end = parameters.get('end') if end is None: end = start end_min = end_max = end reference_bases = parameters['referenceBases'] # account for the 1-based indexing of vcf files region_start += 1 region_end += 1 end_min += 1 end_max += 1 if reference_bases != 'N': # For specific reference bases region may be smaller max_offset = len(reference_bases) - 1 end_max = min(region_end + max_offset, end_max) region_start = max(end_min - max_offset, region_start) if end_min > end_max or region_start > region_end: # Region search will find nothing, search a dummy region region_start = 2000000000 region_end = region_start end_min = region_start + max_offset end_max = end_min alternate_bases = parameters.get('alternateBases') variant_type = parameters.get('variantType') include_datasets = parameters.get('includeDatasetResponses', 'NONE') responses = queue.Queue() threads = [] for dataset in datasets: dataset_id = dataset['id']['S'] vcf_locations = {vcf: vcf_chromosomes[vcf] for vcf in dataset['vcfLocations']['SS'] if vcf_chromosomes[vcf]} t = threading.Thread(target=perform_query, kwargs={ 'dataset_id': dataset_id, 'vcf_locations': vcf_locations, 'reference_bases': reference_bases, 'region_start': region_start, 'region_end': region_end, 'end_min': end_min, 'end_max': end_max, 'alternate_bases': alternate_bases, 'variant_type': variant_type, 'include_datasets': include_datasets, 'responses': responses, }) t.start() threads.append(t) num_threads = len(threads) processed = 0 dataset_responses = [] exists = False while processed < num_threads and (include_datasets != 'NONE' or not exists): response = responses.get() processed += 1 if 'exists' not in response: # function errored out, ignore continue exists = exists or response['exists'] if response.pop('include'): dataset_responses.append(response) response_dict.update({ 'exists': exists, 'datasetAlleleResponses': dataset_responses or None, }) return bundle_response(200, response_dict)