def add_variants_dataset_handler(request, project_guid): """Create or update samples for the given variant dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'elasticsearchIndex': <String> (required) 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFilePath': <String> } Response body - will contain the following structure: """ project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) try: if 'elasticsearchIndex' not in request_json: raise ValueError('"elasticsearchIndex" is required') elasticsearch_index = request_json['elasticsearchIndex'].strip() sample_ids, index_metadata = get_elasticsearch_index_samples( elasticsearch_index) validate_index_metadata(index_metadata, project, elasticsearch_index) sample_type = index_metadata['sampleType'] dataset_path = index_metadata['sourceFilePath'] sample_id_to_individual_id_mapping = load_mapping_file( request_json['mappingFilePath']) if request_json.get( 'mappingFilePath') else {} matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping= sample_id_to_individual_id_mapping, ) unmatched_samples = set(sample_ids) - set( matched_sample_id_to_sample_record.keys()) if request_json.get('ignoreExtraSamplesInCallset'): if len(matched_sample_id_to_sample_record) == 0: raise Exception( "None of the individuals or samples in the project matched the {} expected sample id(s)" .format(len(sample_ids))) elif len(unmatched_samples) > 0: raise Exception( 'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.' .format(", ".join(unmatched_samples))) included_family_individuals = defaultdict(set) for sample in matched_sample_id_to_sample_record.values(): included_family_individuals[sample.individual.family].add( sample.individual.individual_id) missing_family_individuals = [] for family, individual_ids in included_family_individuals.items(): missing_indivs = family.individual_set.filter( sample__sample_status=Sample.SAMPLE_STATUS_LOADED, sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS ).exclude(individual_id__in=individual_ids) if missing_indivs: missing_family_individuals.append('{} ({})'.format( family.family_id, ', '.join([i.individual_id for i in missing_indivs]))) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}.' .format(', '.join(missing_family_individuals))) _update_samples(matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400) if not matched_sample_id_to_sample_record: return create_json_response({'samplesByGuid': {}}) update_project_from_json(project, {'has_new_search': True}) reset_cached_search_results(project) update_xbrowse_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record) families_to_update = [ family for family in included_family_individuals.keys() if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA ] for family in families_to_update: update_model_from_json( family, {'analysis_status': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS}) response_json = _get_samples_json(matched_sample_id_to_sample_record, project_guid) response_json['familiesByGuid'] = { family.guid: { 'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS } for family in families_to_update } return create_json_response(response_json)
def add_variants_dataset_handler(request, project_guid): """Create or update samples for the given variant dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'elasticsearchIndex': <String> (required) 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFilePath': <String> } Response body - will contain the following structure: """ project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) try: required_fields = ['elasticsearchIndex', 'datasetType'] if any(field not in request_json for field in required_fields): raise ValueError('request must contain fields: {}'.format( ', '.join(required_fields))) elasticsearch_index = request_json['elasticsearchIndex'].strip() dataset_type = request_json['datasetType'] if dataset_type not in Sample.DATASET_TYPE_LOOKUP: raise ValueError('Invalid dataset type "{}"'.format(dataset_type)) sample_ids, index_metadata = get_elasticsearch_index_samples( elasticsearch_index, dataset_type=dataset_type) if not sample_ids: raise ValueError( 'No samples found in the index. Make sure the specified caller type is correct' ) validate_index_metadata(index_metadata, project, elasticsearch_index, dataset_type=dataset_type) sample_type = index_metadata['sampleType'] sample_id_to_individual_id_mapping = load_mapping_file( request_json['mappingFilePath']) if request_json.get( 'mappingFilePath') else {} loaded_date = timezone.now() matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=dataset_type, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping= sample_id_to_individual_id_mapping, loaded_date=loaded_date, ) unmatched_samples = set(sample_ids) - set( matched_sample_id_to_sample_record.keys()) if request_json.get('ignoreExtraSamplesInCallset'): if len(matched_sample_id_to_sample_record) == 0: raise Exception( "None of the individuals or samples in the project matched the {} expected sample id(s)" .format(len(sample_ids))) elif len(unmatched_samples) > 0: raise Exception( 'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.' .format(", ".join(unmatched_samples))) prefetch_related_objects(matched_sample_id_to_sample_record.values(), 'individual__family') included_families = { sample.individual.family for sample in matched_sample_id_to_sample_record.values() } missing_individuals = Individual.objects.filter( family__in=included_families, sample__is_active=True, sample__dataset_type=dataset_type, ).exclude(sample__in=matched_sample_id_to_sample_record.values() ).select_related('family') missing_family_individuals = defaultdict(list) for individual in missing_individuals: missing_family_individuals[individual.family].append(individual) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}.' .format(', '.join( sorted([ '{} ({})'.format( family.family_id, ', '.join( sorted( [i.individual_id for i in missing_indivs]))) for family, missing_indivs in missing_family_individuals.items() ])))) inactivate_sample_guids = _update_variant_samples( matched_sample_id_to_sample_record, elasticsearch_index, loaded_date, dataset_type) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400) if not matched_sample_id_to_sample_record: return create_json_response({'samplesByGuid': {}}) family_guids_to_update = [ family.guid for family in included_families if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA ] Family.objects.filter(guid__in=family_guids_to_update).update( analysis_status=Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS) response_json = _get_samples_json(matched_sample_id_to_sample_record, inactivate_sample_guids, project_guid) response_json['familiesByGuid'] = { family_guid: { 'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS } for family_guid in family_guids_to_update } return create_json_response(response_json)
def add_variants_dataset_handler(request, project_guid): """Create or update samples for the given variant dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'elasticsearchIndex': <String> (required) 'ignoreExtraSamplesInCallset': <Boolean> 'ignoreMissingFamilyMembers': <Boolean> 'mappingFilePath': <String> } Response body - will contain the following structure: """ project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) try: if 'elasticsearchIndex' not in request_json: raise ValueError('"elasticsearchIndex" is required') elasticsearch_index = request_json['elasticsearchIndex'].strip() sample_ids, index_metadata = get_elasticsearch_index_samples(elasticsearch_index) validate_index_metadata(index_metadata, project, elasticsearch_index) sample_type = index_metadata['sampleType'] dataset_path = index_metadata['sourceFilePath'] sample_id_to_individual_id_mapping = load_mapping_file( request_json['mappingFilePath']) if request_json.get('mappingFilePath') else {} matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping=sample_id_to_individual_id_mapping, ) unmatched_samples = set(sample_ids) - set(matched_sample_id_to_sample_record.keys()) if request_json.get('ignoreExtraSamplesInCallset'): if len(matched_sample_id_to_sample_record) == 0: raise Exception( "None of the individuals or samples in the project matched the {} expected sample id(s)".format( len(sample_ids) )) elif len(unmatched_samples) > 0: raise Exception( 'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.'.format( ", ".join(unmatched_samples))) if not request_json.get('ignoreMissingFamilyMembers'): included_family_individuals = defaultdict(set) for sample in matched_sample_id_to_sample_record.values(): included_family_individuals[sample.individual.family].add(sample.individual.individual_id) missing_family_individuals = [] for family, individual_ids in included_family_individuals.items(): missing_indivs = family.individual_set.filter( sample__sample_status=Sample.SAMPLE_STATUS_LOADED, sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS ).exclude(individual_id__in=individual_ids) if missing_indivs: missing_family_individuals.append( '{} ({})'.format(family.family_id, ', '.join([i.individual_id for i in missing_indivs])) ) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}. This can lead to errors in variant search. If you still want to upload this callset, select the "Ignore missing family members" checkbox.'.format( ', '.join(missing_family_individuals) )) _update_samples( matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path ) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400) if not matched_sample_id_to_sample_record: return create_json_response({'samplesByGuid': {}}) update_project_from_json(project, {'has_new_search': True}) reset_cached_search_results(project) _deprecated_update_vcfffiles( project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record ) return create_json_response(_get_samples_json(matched_sample_id_to_sample_record, project_guid))