Example #1
0
def add_variants_dataset_handler(request, project_guid):
    """Create or update samples for the given variant dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'elasticsearchIndex': <String> (required)
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFilePath':  <String>
        }

        Response body - will contain the following structure:

    """

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)
    request_json = json.loads(request.body)

    try:
        if 'elasticsearchIndex' not in request_json:
            raise ValueError('"elasticsearchIndex" is required')
        elasticsearch_index = request_json['elasticsearchIndex'].strip()

        sample_ids, index_metadata = get_elasticsearch_index_samples(
            elasticsearch_index)
        validate_index_metadata(index_metadata, project, elasticsearch_index)
        sample_type = index_metadata['sampleType']
        dataset_path = index_metadata['sourceFilePath']

        sample_id_to_individual_id_mapping = load_mapping_file(
            request_json['mappingFilePath']) if request_json.get(
                'mappingFilePath') else {}

        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping=
            sample_id_to_individual_id_mapping,
        )

        unmatched_samples = set(sample_ids) - set(
            matched_sample_id_to_sample_record.keys())

        if request_json.get('ignoreExtraSamplesInCallset'):
            if len(matched_sample_id_to_sample_record) == 0:
                raise Exception(
                    "None of the individuals or samples in the project matched the {} expected sample id(s)"
                    .format(len(sample_ids)))
        elif len(unmatched_samples) > 0:
            raise Exception(
                'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.'
                .format(", ".join(unmatched_samples)))

        included_family_individuals = defaultdict(set)
        for sample in matched_sample_id_to_sample_record.values():
            included_family_individuals[sample.individual.family].add(
                sample.individual.individual_id)
        missing_family_individuals = []
        for family, individual_ids in included_family_individuals.items():
            missing_indivs = family.individual_set.filter(
                sample__sample_status=Sample.SAMPLE_STATUS_LOADED,
                sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS
            ).exclude(individual_id__in=individual_ids)
            if missing_indivs:
                missing_family_individuals.append('{} ({})'.format(
                    family.family_id,
                    ', '.join([i.individual_id for i in missing_indivs])))
        if missing_family_individuals:
            raise Exception(
                'The following families are included in the callset but are missing some family members: {}.'
                .format(', '.join(missing_family_individuals)))

        _update_samples(matched_sample_id_to_sample_record,
                        elasticsearch_index=elasticsearch_index,
                        dataset_path=dataset_path)

    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)

    if not matched_sample_id_to_sample_record:
        return create_json_response({'samplesByGuid': {}})

    update_project_from_json(project, {'has_new_search': True})
    reset_cached_search_results(project)

    update_xbrowse_vcfffiles(project, sample_type, elasticsearch_index,
                             dataset_path, matched_sample_id_to_sample_record)

    families_to_update = [
        family for family in included_family_individuals.keys()
        if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA
    ]
    for family in families_to_update:
        update_model_from_json(
            family,
            {'analysis_status': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS})

    response_json = _get_samples_json(matched_sample_id_to_sample_record,
                                      project_guid)
    response_json['familiesByGuid'] = {
        family.guid: {
            'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS
        }
        for family in families_to_update
    }
    return create_json_response(response_json)
Example #2
0
def add_variants_dataset_handler(request, project_guid):
    """Create or update samples for the given variant dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'elasticsearchIndex': <String> (required)
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFilePath':  <String>
        }

        Response body - will contain the following structure:

    """

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)
    request_json = json.loads(request.body)

    try:
        required_fields = ['elasticsearchIndex', 'datasetType']
        if any(field not in request_json for field in required_fields):
            raise ValueError('request must contain fields: {}'.format(
                ', '.join(required_fields)))
        elasticsearch_index = request_json['elasticsearchIndex'].strip()
        dataset_type = request_json['datasetType']
        if dataset_type not in Sample.DATASET_TYPE_LOOKUP:
            raise ValueError('Invalid dataset type "{}"'.format(dataset_type))

        sample_ids, index_metadata = get_elasticsearch_index_samples(
            elasticsearch_index, dataset_type=dataset_type)
        if not sample_ids:
            raise ValueError(
                'No samples found in the index. Make sure the specified caller type is correct'
            )
        validate_index_metadata(index_metadata,
                                project,
                                elasticsearch_index,
                                dataset_type=dataset_type)
        sample_type = index_metadata['sampleType']

        sample_id_to_individual_id_mapping = load_mapping_file(
            request_json['mappingFilePath']) if request_json.get(
                'mappingFilePath') else {}

        loaded_date = timezone.now()
        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=dataset_type,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping=
            sample_id_to_individual_id_mapping,
            loaded_date=loaded_date,
        )

        unmatched_samples = set(sample_ids) - set(
            matched_sample_id_to_sample_record.keys())

        if request_json.get('ignoreExtraSamplesInCallset'):
            if len(matched_sample_id_to_sample_record) == 0:
                raise Exception(
                    "None of the individuals or samples in the project matched the {} expected sample id(s)"
                    .format(len(sample_ids)))
        elif len(unmatched_samples) > 0:
            raise Exception(
                'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.'
                .format(", ".join(unmatched_samples)))

        prefetch_related_objects(matched_sample_id_to_sample_record.values(),
                                 'individual__family')
        included_families = {
            sample.individual.family
            for sample in matched_sample_id_to_sample_record.values()
        }

        missing_individuals = Individual.objects.filter(
            family__in=included_families,
            sample__is_active=True,
            sample__dataset_type=dataset_type,
        ).exclude(sample__in=matched_sample_id_to_sample_record.values()
                  ).select_related('family')
        missing_family_individuals = defaultdict(list)
        for individual in missing_individuals:
            missing_family_individuals[individual.family].append(individual)

        if missing_family_individuals:
            raise Exception(
                'The following families are included in the callset but are missing some family members: {}.'
                .format(', '.join(
                    sorted([
                        '{} ({})'.format(
                            family.family_id, ', '.join(
                                sorted(
                                    [i.individual_id
                                     for i in missing_indivs]))) for family,
                        missing_indivs in missing_family_individuals.items()
                    ]))))

        inactivate_sample_guids = _update_variant_samples(
            matched_sample_id_to_sample_record, elasticsearch_index,
            loaded_date, dataset_type)

    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)

    if not matched_sample_id_to_sample_record:
        return create_json_response({'samplesByGuid': {}})

    family_guids_to_update = [
        family.guid for family in included_families
        if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA
    ]
    Family.objects.filter(guid__in=family_guids_to_update).update(
        analysis_status=Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS)

    response_json = _get_samples_json(matched_sample_id_to_sample_record,
                                      inactivate_sample_guids, project_guid)
    response_json['familiesByGuid'] = {
        family_guid: {
            'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS
        }
        for family_guid in family_guids_to_update
    }

    return create_json_response(response_json)
Example #3
0
def add_variants_dataset_handler(request, project_guid):
    """Create or update samples for the given variant dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'elasticsearchIndex': <String> (required)
            'ignoreExtraSamplesInCallset': <Boolean>
            'ignoreMissingFamilyMembers': <Boolean>
            'mappingFilePath':  <String>
        }

        Response body - will contain the following structure:

    """

    project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT)
    request_json = json.loads(request.body)

    try:
        if 'elasticsearchIndex' not in request_json:
            raise ValueError('"elasticsearchIndex" is required')
        elasticsearch_index = request_json['elasticsearchIndex'].strip()

        sample_ids, index_metadata = get_elasticsearch_index_samples(elasticsearch_index)
        validate_index_metadata(index_metadata, project, elasticsearch_index)
        sample_type = index_metadata['sampleType']
        dataset_path = index_metadata['sourceFilePath']

        sample_id_to_individual_id_mapping = load_mapping_file(
            request_json['mappingFilePath']) if request_json.get('mappingFilePath') else {}

        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping=sample_id_to_individual_id_mapping,
        )

        unmatched_samples = set(sample_ids) - set(matched_sample_id_to_sample_record.keys())

        if request_json.get('ignoreExtraSamplesInCallset'):
            if len(matched_sample_id_to_sample_record) == 0:
                raise Exception(
                    "None of the individuals or samples in the project matched the {} expected sample id(s)".format(
                        len(sample_ids)
                    ))
        elif len(unmatched_samples) > 0:
            raise Exception(
                'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.'.format(
                    ", ".join(unmatched_samples)))

        if not request_json.get('ignoreMissingFamilyMembers'):
            included_family_individuals = defaultdict(set)
            for sample in matched_sample_id_to_sample_record.values():
                included_family_individuals[sample.individual.family].add(sample.individual.individual_id)
            missing_family_individuals = []
            for family, individual_ids in included_family_individuals.items():
                missing_indivs = family.individual_set.filter(
                    sample__sample_status=Sample.SAMPLE_STATUS_LOADED,
                    sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS
                ).exclude(individual_id__in=individual_ids)
                if missing_indivs:
                    missing_family_individuals.append(
                        '{} ({})'.format(family.family_id, ', '.join([i.individual_id for i in missing_indivs]))
                    )
            if missing_family_individuals:
                raise Exception(
                    'The following families are included in the callset but are missing some family members: {}. This can lead to errors in variant search. If you still want to upload this callset, select the "Ignore missing family members" checkbox.'.format(
                        ', '.join(missing_family_individuals)
                    ))

        _update_samples(
            matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path
        )

    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]}, status=400)

    if not matched_sample_id_to_sample_record:
        return create_json_response({'samplesByGuid': {}})

    update_project_from_json(project, {'has_new_search': True})
    reset_cached_search_results(project)

    _deprecated_update_vcfffiles(
        project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record
    )

    return create_json_response(_get_samples_json(matched_sample_id_to_sample_record, project_guid))