Esempio n. 1
0
def update_xbrowse_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record):
    base_project = find_matching_xbrowse_model(project)
    get_datastore(base_project).bust_project_cache(base_project.project_id)
    clear_project_results_cache(base_project.project_id)

    vcf_file = VCFFile.objects.filter(
        project=base_project,
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_type=sample_type,
        elasticsearch_index=elasticsearch_index).order_by('-pk').first()

    if not vcf_file:
        vcf_file = VCFFile.objects.create(
            project=base_project,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            sample_type=sample_type,
            elasticsearch_index=elasticsearch_index,
        )
        logging.info("Created vcf file: " + str(vcf_file.__dict__))

    vcf_file.file_path = dataset_path
    vcf_file.loaded_date = matched_sample_id_to_sample_record.values()[0].loaded_date
    vcf_file.save()

    base_individuals = BaseIndividual.objects.filter(
        seqr_individual_id__in=[s.individual_id for s in matched_sample_id_to_sample_record.values()]
    )
    for base_indiv in base_individuals:
        base_indiv.vcf_files.add(vcf_file)
Esempio n. 2
0
def _deprecated_update_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record):
    base_project = BaseProject.objects.get(seqr_project=project)
    get_datastore(base_project).bust_project_cache(base_project.project_id)
    clear_project_results_cache(base_project.project_id)

    vcf_file = VCFFile.objects.filter(
        project=base_project,
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_type=sample_type,
        elasticsearch_index=elasticsearch_index).order_by('-pk').first()

    if not vcf_file:
        vcf_file = VCFFile.objects.create(
            project=base_project,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            sample_type=sample_type,
            elasticsearch_index=elasticsearch_index,
        )
        logger.info("Created vcf file: " + str(vcf_file.__dict__))

    vcf_file.file_path = dataset_path
    vcf_file.loaded_date = matched_sample_id_to_sample_record.values()[0].loaded_date
    vcf_file.save()

    for indiv in [s.individual for s in matched_sample_id_to_sample_record.values()]:
        for base_indiv in BaseIndividual.objects.filter(seqr_individual=indiv).only('id'):
            base_indiv.vcf_files.add(vcf_file)
Esempio n. 3
0
def add_dataset_handler(request, project_guid):
    """Create or update samples for the given dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'sampleType':  <"WGS", "WES", or "RNA"> (required)
            'datasetType': <"VARIANTS", or "ALIGN"> (required)
            'elasticsearchIndex': <String>
            'datasetPath': <String>
            'datasetName': <String>
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> }
        }

        Response body - will contain the following structure:

    """

    logger.info("add_dataset_handler: " + str(request))

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)

    request_json = json.loads(request.body)

    logger.info("add_dataset_handler: received %s" % pformat(request_json))

    required_fields = ['sampleType', 'datasetType']
    if any(field not in request_json for field in required_fields):
        raise ValueError("request must contain fields: {}".format(
            ', '.join(required_fields)))

    sample_type = request_json['sampleType']
    dataset_type = request_json['datasetType']
    elasticsearch_index = request_json.get('elasticsearchIndex')
    if elasticsearch_index:
        elasticsearch_index = elasticsearch_index.strip()
    dataset_path = request_json.get('datasetPath')
    if dataset_path:
        dataset_path = dataset_path.strip()
    dataset_name = request_json.get('datasetName')
    if dataset_name:
        dataset_name = dataset_name.strip()

    ignore_extra_samples_in_callset = request_json.get(
        'ignoreExtraSamplesInCallset')
    ignore_missing_family_members = request_json.get(
        'ignoreMissingFamilyMembers')
    mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId')
    mapping_file_path = request_json.get('mappingFilePath')

    try:
        updated_samples, created_sample_ids = add_dataset(
            project=project,
            sample_type=sample_type,
            dataset_type=dataset_type,
            elasticsearch_index=elasticsearch_index,
            dataset_path=dataset_path,
            dataset_name=dataset_name,
            max_edit_distance=0,
            ignore_extra_samples_in_callset=ignore_extra_samples_in_callset,
            ignore_missing_family_members=ignore_missing_family_members,
            mapping_file_path=mapping_file_path,
            mapping_file_id=mapping_file_id,
        )

        # update VCFFile records
        if updated_samples:
            if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS:
                base_project = BaseProject.objects.get(seqr_project=project)
                get_datastore(base_project).bust_project_cache(
                    base_project.project_id)
                clear_project_results_cache(base_project.project_id)

                vcf_file = VCFFile.objects.filter(
                    project=base_project,
                    dataset_type=dataset_type,
                    sample_type=sample_type,
                    elasticsearch_index=elasticsearch_index).order_by(
                        '-pk').first()

                if not vcf_file:
                    vcf_file = VCFFile.objects.create(
                        project=base_project,
                        dataset_type=dataset_type,
                        sample_type=sample_type,
                        elasticsearch_index=elasticsearch_index,
                    )
                    logger.info("Created vcf file: " + str(vcf_file.__dict__))

                vcf_file.file_path = dataset_path or "{}.vcf.gz".format(
                    elasticsearch_index
                )  # legacy VCFFile model requires non-empty vcf path
                vcf_file.loaded_date = iter(updated_samples).next().loaded_date
                vcf_file.save()

                for indiv in [s.individual for s in updated_samples]:
                    for base_indiv in BaseIndividual.objects.filter(
                            seqr_individual=indiv).only('id'):
                        base_indiv.vcf_files.add(vcf_file)

            elif dataset_type == Sample.DATASET_TYPE_READ_ALIGNMENTS:
                for sample in updated_samples:
                    for base_indiv in BaseIndividual.objects.filter(
                            seqr_individual=sample.individual).only('id'):
                        base_indiv.bam_file_path = sample.dataset_file_path
                        base_indiv.save()

        updated_sample_json = get_json_for_samples(updated_samples,
                                                   project_guid=project_guid)
        response = {
            'samplesByGuid': {s['sampleGuid']: s
                              for s in updated_sample_json}
        }
        updated_individuals = {
            s['individualGuid']
            for s in updated_sample_json if s['sampleId'] in created_sample_ids
        }
        if updated_individuals:
            individuals = Individual.objects.filter(
                guid__in=updated_individuals).prefetch_related(
                    'sample_set', 'family').only('guid')
            response['individualsByGuid'] = {
                ind.guid: {
                    'sampleGuids':
                    [s.guid for s in ind.sample_set.only('guid').all()]
                }
                for ind in individuals
            }

            for ind in individuals:
                family = ind.family
                if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA:
                    update_seqr_model(family,
                                      analysis_status=Family.
                                      ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS)

        return create_json_response(response)
    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)