Ejemplo n.º 1
0
def _get_samples_json(matched_sample_id_to_sample_record, project_guid):
    updated_sample_json = get_json_for_samples(matched_sample_id_to_sample_record.values(), project_guid=project_guid)
    response = {
        'samplesByGuid': {s['sampleGuid']: s for s in updated_sample_json}
    }
    updated_individuals = {s['individualGuid'] for s in updated_sample_json}
    if updated_individuals:
        individuals = Individual.objects.filter(guid__in=updated_individuals).prefetch_related('sample_set',
                                                                                               'family').only('guid')
        response['individualsByGuid'] = {
            ind.guid: {'sampleGuids': [s.guid for s in ind.sample_set.only('guid').all()]} for ind in individuals
        }
    return response
Ejemplo n.º 2
0
def _get_samples_json(matched_sample_id_to_sample_record, inactivate_sample_guids, project_guid):
    updated_sample_json = get_json_for_samples(matched_sample_id_to_sample_record.values(), project_guid=project_guid)
    sample_response = {sample_guid: {'isActive': False} for sample_guid in inactivate_sample_guids}
    sample_response.update({s['sampleGuid']: s for s in updated_sample_json})
    response = {
        'samplesByGuid': sample_response
    }
    updated_individuals = {s['individualGuid'] for s in updated_sample_json}
    if updated_individuals:
        individuals = Individual.objects.filter(guid__in=updated_individuals).prefetch_related('sample_set')
        response['individualsByGuid'] = {
            ind.guid: {'sampleGuids': [s.guid for s in ind.sample_set.all()]} for ind in individuals
        }
    return response
Ejemplo n.º 3
0
def _retrieve_samples(project_guid, individuals_by_guid, sample_models, sample_guid_key='sampleGuids'):
    """Retrieves sample metadata for the given project.

        Args:
            project_guid (string): project_guid
            individuals_by_guid (dict): maps each individual_guid to a dictionary with individual info.
                This method adds a "sampleGuids" list to each of these dictionaries.
        Returns:
            2-tuple with dictionaries: (samples_by_guid, sample_batches_by_guid)
        """
    samples = get_json_for_samples(sample_models, project_guid=project_guid)

    samples_by_guid = {}
    for s in samples:
        sample_guid = s['sampleGuid']
        samples_by_guid[sample_guid] = s

        individual_guid = s['individualGuid']
        individuals_by_guid[individual_guid][sample_guid_key].add(sample_guid)

    return samples_by_guid
Ejemplo n.º 4
0
def add_dataset_handler(request, project_guid):
    """Create or update samples for the given dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'sampleType':  <"WGS", "WES", or "RNA"> (required)
            'datasetType': <"VARIANTS", or "ALIGN"> (required)
            'elasticsearchIndex': <String>
            'datasetPath': <String>
            'datasetName': <String>
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> }
        }

        Response body - will contain the following structure:

    """

    logger.info("add_dataset_handler: " + str(request))

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)

    request_json = json.loads(request.body)

    logger.info("add_dataset_handler: received %s" % pformat(request_json))

    required_fields = ['sampleType', 'datasetType']
    if any(field not in request_json for field in required_fields):
        raise ValueError("request must contain fields: {}".format(
            ', '.join(required_fields)))

    sample_type = request_json['sampleType']
    dataset_type = request_json['datasetType']
    elasticsearch_index = request_json.get('elasticsearchIndex')
    if elasticsearch_index:
        elasticsearch_index = elasticsearch_index.strip()
    dataset_path = request_json.get('datasetPath')
    if dataset_path:
        dataset_path = dataset_path.strip()
    dataset_name = request_json.get('datasetName')
    if dataset_name:
        dataset_name = dataset_name.strip()

    ignore_extra_samples_in_callset = request_json.get(
        'ignoreExtraSamplesInCallset')
    ignore_missing_family_members = request_json.get(
        'ignoreMissingFamilyMembers')
    mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId')
    mapping_file_path = request_json.get('mappingFilePath')

    try:
        updated_samples, created_sample_ids = add_dataset(
            project=project,
            sample_type=sample_type,
            dataset_type=dataset_type,
            elasticsearch_index=elasticsearch_index,
            dataset_path=dataset_path,
            dataset_name=dataset_name,
            max_edit_distance=0,
            ignore_extra_samples_in_callset=ignore_extra_samples_in_callset,
            ignore_missing_family_members=ignore_missing_family_members,
            mapping_file_path=mapping_file_path,
            mapping_file_id=mapping_file_id,
        )

        # update VCFFile records
        if updated_samples:
            if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS:
                base_project = BaseProject.objects.get(seqr_project=project)
                get_datastore(base_project).bust_project_cache(
                    base_project.project_id)
                clear_project_results_cache(base_project.project_id)

                vcf_file = VCFFile.objects.filter(
                    project=base_project,
                    dataset_type=dataset_type,
                    sample_type=sample_type,
                    elasticsearch_index=elasticsearch_index).order_by(
                        '-pk').first()

                if not vcf_file:
                    vcf_file = VCFFile.objects.create(
                        project=base_project,
                        dataset_type=dataset_type,
                        sample_type=sample_type,
                        elasticsearch_index=elasticsearch_index,
                    )
                    logger.info("Created vcf file: " + str(vcf_file.__dict__))

                vcf_file.file_path = dataset_path or "{}.vcf.gz".format(
                    elasticsearch_index
                )  # legacy VCFFile model requires non-empty vcf path
                vcf_file.loaded_date = iter(updated_samples).next().loaded_date
                vcf_file.save()

                for indiv in [s.individual for s in updated_samples]:
                    for base_indiv in BaseIndividual.objects.filter(
                            seqr_individual=indiv).only('id'):
                        base_indiv.vcf_files.add(vcf_file)

            elif dataset_type == Sample.DATASET_TYPE_READ_ALIGNMENTS:
                for sample in updated_samples:
                    for base_indiv in BaseIndividual.objects.filter(
                            seqr_individual=sample.individual).only('id'):
                        base_indiv.bam_file_path = sample.dataset_file_path
                        base_indiv.save()

        updated_sample_json = get_json_for_samples(updated_samples,
                                                   project_guid=project_guid)
        response = {
            'samplesByGuid': {s['sampleGuid']: s
                              for s in updated_sample_json}
        }
        updated_individuals = {
            s['individualGuid']
            for s in updated_sample_json if s['sampleId'] in created_sample_ids
        }
        if updated_individuals:
            individuals = Individual.objects.filter(
                guid__in=updated_individuals).prefetch_related(
                    'sample_set', 'family').only('guid')
            response['individualsByGuid'] = {
                ind.guid: {
                    'sampleGuids':
                    [s.guid for s in ind.sample_set.only('guid').all()]
                }
                for ind in individuals
            }

            for ind in individuals:
                family = ind.family
                if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA:
                    update_seqr_model(family,
                                      analysis_status=Family.
                                      ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS)

        return create_json_response(response)
    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)
Ejemplo n.º 5
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = set()

    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    variant_tag_types_by_guid = {
        vtt.guid: vtt
        for vtt in VariantTagType.objects.filter(
            Q(project__in=projects)
            | Q(project__isnull=True)).prefetch_related('project')
    }
    variant_tag_types = _get_json_for_models(
        variant_tag_types_by_guid.values())
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]
        project_locus_lists = get_project_locus_list_models(project)
        locus_lists.update(project_locus_lists)

        project_json.update({
            'locusListGuids':
            [locus_list.guid for locus_list in project_locus_lists],
            'variantTagTypes': [
                vtt for vtt in variant_tag_types
                if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project
                is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']]
                .project.guid == project_json['projectGuid']
            ],
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    families = _get_json_for_families(
        Family.objects.filter(project__in=projects), user)
    individuals = _get_json_for_individuals(
        Individual.objects.filter(family__project__in=projects), user=user)
    samples = get_json_for_samples(
        Sample.objects.filter(individual__family__project__in=projects))
    analysis_groups = get_json_for_analysis_groups(
        AnalysisGroup.objects.filter(project__in=projects))

    individual_guids_by_family = defaultdict(list)
    for individual in individuals:
        individual_guids_by_family[individual['familyGuid']].append(
            individual['individualGuid'])
    for family in families:
        family['individualGuids'] = individual_guids_by_family[
            family['familyGuid']]

    sample_guids_by_individual = defaultdict(list)
    for sample in samples:
        sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    for individual in individuals:
        individual['sampleGuids'] = sample_guids_by_individual[
            individual['individualGuid']]

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(list(locus_lists), user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Ejemplo n.º 6
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_project_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = LocusList.objects.filter(
        projects__in=projects).prefetch_related('projects')

    project_guid = projects[0].guid if len(projects) == 1 else None

    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    variant_tag_types_by_guid = {
        vtt.guid: vtt
        for vtt in VariantTagType.objects.filter(
            Q(project__in=projects)
            | Q(project__isnull=True)).prefetch_related('project')
    }
    variant_tag_types = _get_json_for_models(
        list(variant_tag_types_by_guid.values()))
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]

        project_json.update({
            'locusListGuids': [
                locus_list.guid for locus_list in locus_lists
                if project in locus_list.projects.all()
            ],
            'variantTagTypes': [
                vtt for vtt in variant_tag_types
                if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project
                is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']]
                .project.guid == project_json['projectGuid']
            ],
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    family_models = Family.objects.filter(project__in=projects)
    families = _get_json_for_families(family_models,
                                      user,
                                      project_guid=project_guid,
                                      skip_nested=True)

    individual_models = Individual.objects.filter(family__in=family_models)
    individuals = _get_json_for_individuals(individual_models,
                                            user=user,
                                            project_guid=project_guid,
                                            add_hpo_details=True,
                                            skip_nested=True)

    sample_models = Sample.objects.filter(individual__in=individual_models)
    samples = get_json_for_samples(sample_models,
                                   project_guid=project_guid,
                                   skip_nested=True)

    igv_sample_models = IgvSample.objects.filter(
        individual__in=individual_models)
    igv_samples = get_json_for_samples(igv_sample_models,
                                       project_guid=project_guid,
                                       skip_nested=True)

    analysis_group_models = AnalysisGroup.objects.filter(project__in=projects)
    analysis_groups = get_json_for_analysis_groups(analysis_group_models,
                                                   project_guid=project_guid,
                                                   skip_nested=True)

    if not project_guid:
        project_id_to_guid = {project.id: project.guid for project in projects}
        family_id_to_guid = {
            family.id: family.guid
            for family in family_models
        }
        individual_id_to_guid = {
            individual.id: individual.guid
            for individual in individual_models
        }
        family_guid_to_project_guid = {}
        individual_guid_to_project_guid = {}
        for family in families:
            project_guid = project_id_to_guid[family.pop('projectId')]
            family['projectGuid'] = project_guid
            family_guid_to_project_guid[family['familyGuid']] = project_guid
        for individual in individuals:
            family_guid = family_id_to_guid[individual.pop('familyId')]
            project_guid = family_guid_to_project_guid[family_guid]
            individual['familyGuid'] = family_guid
            individual['projectGuid'] = project_guid
            individual_guid_to_project_guid[
                individual['individualGuid']] = project_guid
        for sample in samples:
            individual_guid = individual_id_to_guid[sample.pop('individualId')]
            sample['individualGuid'] = individual_guid
            sample['projectGuid'] = individual_guid_to_project_guid[
                individual_guid]
        for sample in igv_samples:
            individual_guid = individual_id_to_guid[sample.pop('individualId')]
            sample['individualGuid'] = individual_guid
            sample['projectGuid'] = individual_guid_to_project_guid[
                individual_guid]
        for group in analysis_groups:
            group['projectGuid'] = project_id_to_guid[group.pop('projectId')]

    individual_guids_by_family = defaultdict(list)
    for individual in individuals:
        individual_guids_by_family[individual['familyGuid']].append(
            individual['individualGuid'])
    for family in families:
        family['individualGuids'] = individual_guids_by_family[
            family['familyGuid']]

    sample_guids_by_individual = defaultdict(list)
    for sample in samples:
        sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    igv_sample_guids_by_individual = defaultdict(list)
    for sample in igv_samples:
        igv_sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    for individual in individuals:
        individual['sampleGuids'] = sample_guids_by_individual[
            individual['individualGuid']]
        individual['igvSampleGuids'] = igv_sample_guids_by_individual[
            individual['individualGuid']]

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'igvSamplesByGuid': {s['sampleGuid']: s
                             for s in igv_samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(locus_lists, user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Ejemplo n.º 7
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = set()
    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]
        project_locus_lists = get_project_locus_list_models(project)
        locus_lists.update(project_locus_lists)

        project_json.update({
            'locusListGuids':
            [locus_list.guid for locus_list in project_locus_lists],
            'variantTagTypes':
            get_project_variant_tag_types(project),
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    families = _get_json_for_families(
        Family.objects.filter(project__in=projects),
        user,
        add_individual_guids_field=True)
    individuals = _get_json_for_individuals(
        Individual.objects.filter(family__project__in=projects),
        user=user,
        add_sample_guids_field=True)
    samples = get_json_for_samples(
        Sample.objects.filter(individual__family__project__in=projects))
    analysis_groups = get_json_for_analysis_groups(
        AnalysisGroup.objects.filter(project__in=projects))

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(list(locus_lists), user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Ejemplo n.º 8
0
def add_dataset_handler(request, project_guid):
    """Create or update samples for the given dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'sampleType':  <"WGS", "WES", or "RNA"> (required)
            'datasetType': <"VARIANTS", or "ALIGN"> (required)
            'elasticsearchIndex': <String>
            'datasetPath': <String>
            'datasetName': <String>
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> }
        }

        Response body - will contain the following structure:

    """

    logger.info("add_dataset_handler: " + str(request))

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)

    request_json = json.loads(request.body)

    logger.info("add_dataset_handler: received %s" % pformat(request_json))

    required_fields = ['sampleType', 'datasetType']
    if any(field not in request_json for field in required_fields):
        raise ValueError("request must contain fields: {}".format(
            ', '.join(required_fields)))

    sample_type = request_json['sampleType']
    dataset_type = request_json['datasetType']
    elasticsearch_index = request_json.get('elasticsearchIndex')
    dataset_path = request_json.get('datasetPath')
    dataset_name = request_json.get('datasetName')

    ignore_extra_samples_in_callset = request_json.get(
        'ignoreExtraSamplesInCallset')
    mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId')

    try:
        updated_samples, created_sample_ids = add_dataset(
            project=project,
            sample_type=sample_type,
            dataset_type=dataset_type,
            elasticsearch_index=elasticsearch_index,
            dataset_path=dataset_path,
            dataset_name=dataset_name,
            max_edit_distance=0,
            ignore_extra_samples_in_callset=ignore_extra_samples_in_callset,
            mapping_file_id=mapping_file_id,
        )
        updated_sample_json = get_json_for_samples(updated_samples,
                                                   project_guid=project_guid)
        response = {
            'samplesByGuid': {s['sampleGuid']: s
                              for s in updated_sample_json}
        }
        updated_individuals = {
            s['individualGuid']
            for s in updated_sample_json if s['sampleId'] in created_sample_ids
        }
        if updated_individuals:
            individuals = Individual.objects.filter(
                guid__in=updated_individuals).prefetch_related(
                    'sample_set').only('guid')
            response['individualsByGuid'] = {
                ind.guid: {
                    'sampleGuids':
                    [s.guid for s in ind.sample_set.only('guid').all()]
                }
                for ind in individuals
            }
        return create_json_response(response)
    except Exception as e:
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)