Ejemplo n.º 1
0
def _retrieve_individuals(project_guid, user):
    """Retrieves individual-level metadata for the given project.

    Args:
        project_guid (string): project_guid
    Returns:
        dictionary: individuals_by_guid
    """

    fields = Individual._meta.json_fields + Individual._meta.internal_json_fields + \
             ['family__guid', 'case_review_status_last_modified_by__email']
    individual_models = Individual.objects.filter(family__project__guid=project_guid)\
        .select_related('family', 'case_review_status_last_modified_by').only(*fields)

    individuals = _get_json_for_individuals(individual_models,
                                            user=user,
                                            project_guid=project_guid)

    individuals_by_guid = {}
    for i in individuals:
        i['sampleGuids'] = set()
        individual_guid = i['individualGuid']
        individuals_by_guid[individual_guid] = i

    return individuals_by_guid
Ejemplo n.º 2
0
def anvil_export(request, project_guid):
    if project_guid == 'all':
        project_guid = None

    if project_guid:
        projects_by_guid = {project_guid: Project.objects.get(guid=project_guid)}
    else:
        projects_by_guid = {p.guid: p for p in Project.objects.filter(projectcategory__name__iexact='anvil')}

    individuals = _get_loaded_before_date_project_individuals(projects_by_guid.values(), loaded_before=request.GET.get('loadedBefore'))

    saved_variants_by_family = _get_saved_variants_by_family(projects_by_guid.values(), request.user)

    # Handle compound het genes
    compound_het_gene_id_by_family = {}
    for family_guid, saved_variants in saved_variants_by_family.items():
        if len(saved_variants) > 1:
            potential_compound_het_variants = [
                variant for variant in saved_variants if all(gen['numAlt'] < 2 for gen in variant['genotypes'].values())
            ]
            main_gene_ids = {variant['mainTranscript']['geneId'] for variant in potential_compound_het_variants}
            if len(main_gene_ids) > 1:
                # This occurs in compound hets where some hits have a primary transcripts in different genes
                for gene_id in main_gene_ids:
                    if all(gene_id in variant['transcripts'] for variant in potential_compound_het_variants):
                        compound_het_gene_id_by_family[family_guid] = gene_id

    rows = _get_json_for_individuals(list(individuals), project_guid=project_guid, family_fields=['family_id', 'coded_phenotype'])

    gene_ids = set()
    for row in rows:
        row['Project_ID'] = projects_by_guid[row['projectGuid']].name

        saved_variants = saved_variants_by_family[row['familyGuid']]
        row['numSavedVariants'] = len(saved_variants)
        for i, variant in enumerate(saved_variants):
            genotype = variant['genotypes'].get(row['individualGuid'], {})
            if genotype.get('numAlt', -1) > 0:
                gene_id = compound_het_gene_id_by_family.get(row['familyGuid']) or variant['mainTranscript']['geneId']
                gene_ids.add(gene_id)
                variant_fields = {
                    'Zygosity': 'heterozygous' if genotype['numAlt'] == 1 else 'homozygous',
                    'Chrom': variant['chrom'],
                    'Pos': variant['pos'],
                    'Ref': variant['ref'],
                    'Alt': variant['alt'],
                    'hgvsc': variant['mainTranscript']['hgvsc'],
                    'hgvsp': variant['mainTranscript']['hgvsp'],
                    'Transcript': variant['mainTranscript']['transcriptId'],
                    'geneId': gene_id,
                }
                row.update({'{}-{}'.format(k, i + 1): v for k, v in variant_fields.items()})

    genes_by_id = get_genes(gene_ids)
    for row in rows:
        for key, gene_id in row.items():
            if key.startswith('geneId') and genes_by_id.get(gene_id):
                row[key.replace('geneId', 'Gene')] = genes_by_id[gene_id]['geneSymbol']

    return create_json_response({'anvilRows': rows})
Ejemplo n.º 3
0
def save_individuals_metadata_table_handler(request, project_guid,
                                            upload_file_id):
    """
    Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_individuals_metadata_handler
    """
    project = get_project_and_check_permissions(project_guid, request.user)

    json_records, _ = load_uploaded_file(upload_file_id)

    individual_guids = [record[INDIVIDUAL_GUID_COL] for record in json_records]
    individuals_by_guid = {
        i.guid: i
        for i in Individual.objects.filter(family__project=project,
                                           guid__in=individual_guids)
    }

    for record in json_records:
        individual = individuals_by_guid[record[INDIVIDUAL_GUID_COL]]
        update_model_from_json(individual, {
            k: record[k]
            for k in INDIVIDUAL_METADATA_FIELDS.keys() if k in record
        },
                               user=request.user)

    return create_json_response({
        'individualsByGuid': {
            individual['individualGuid']: individual
            for individual in _get_json_for_individuals(
                list(individuals_by_guid.values()),
                user=request.user,
                add_hpo_details=True,
            )
        },
    })
Ejemplo n.º 4
0
def _retrieve_individuals(project_guid, user):
    """Retrieves individual-level metadata for the given project.

    Args:
        project_guid (string): project_guid
    Returns:
        dictionary: individuals_by_guid
    """

    individual_models = Individual.objects.filter(
        family__project__guid=project_guid)

    individuals = _get_json_for_individuals(individual_models,
                                            user=user,
                                            project_guid=project_guid,
                                            add_hpo_details=True)

    individuals_by_guid = {}
    for i in individuals:
        i['sampleGuids'] = set()
        i['igvSampleGuids'] = set()
        i['mmeSubmissionGuid'] = None
        individual_guid = i['individualGuid']
        individuals_by_guid[individual_guid] = i

    return individuals_by_guid, individual_models
Ejemplo n.º 5
0
def saved_variants(request, tag):
    tag_type = VariantTagType.objects.get(name=tag, project__isnull=True)
    saved_variant_models = SavedVariant.objects.filter(varianttag__variant_tag_type=tag_type, family__isnull=False)
    saved_variants = get_json_for_saved_variants(saved_variant_models, add_tags=True, add_details=True, user=request.user)

    project_models_by_guid = {variant.project.guid: variant.project for variant in saved_variant_models}
    families = {variant.family for variant in saved_variant_models}
    individuals = Individual.objects.filter(family__in=families)

    genes = _saved_variant_genes(saved_variants)
    locus_list_guids = _add_locus_lists(project_models_by_guid.values(), saved_variants, genes)

    projects_json = get_json_for_projects(project_models_by_guid.values(), user=request.user, add_project_category_guids_field=False)
    functional_tag_types = get_json_for_variant_functional_data_tag_types()

    for project_json in projects_json:
        project_json.update({
            'locusListGuids': locus_list_guids,
            'variantTagTypes': get_project_variant_tag_types(project_models_by_guid[project_json['projectGuid']]),
            'variantFunctionalTagTypes': functional_tag_types,
        })

    families_json = _get_json_for_families(list(families), user=request.user, add_individual_guids_field=True)
    individuals_json = _get_json_for_individuals(individuals, user=request.user)
    locus_lists_by_guid = {locus_list['locusListGuid']: locus_list for locus_list in
                           get_json_for_locus_lists(LocusList.objects.filter(guid__in=locus_list_guids), request.user)}

    return create_json_response({
        'savedVariantsByGuid': {variant['variantGuid']: variant for variant in saved_variants},
        'genesById': genes,
        'projectsByGuid': {project['projectGuid']: project for project in projects_json},
        'familiesByGuid': {family['familyGuid']: family for family in families_json},
        'individualsByGuid': {indiv['individualGuid']: indiv for indiv in individuals_json},
        'locusListsByGuid': locus_lists_by_guid,
    })
Ejemplo n.º 6
0
def save_individuals_table_handler(request, project_guid, upload_file_id):
    """Handler for 'save' requests to apply Individual tables previously uploaded through receive_individuals_table(..)

    Args:
        request (object): Django request object
        project_guid (string): project GUID
        uploadedFileId (string): a token sent to the client by receive_individuals_table(..)
    """
    project = get_project_and_check_permissions(project_guid, request.user)

    json_records = load_uploaded_file(upload_file_id)

    updated_families, updated_individuals = add_or_update_individuals_and_families(
        project, individual_records=json_records, user=request.user
    )

    # edit individuals
    individuals = _get_json_for_individuals(updated_individuals, request.user, add_sample_guids_field=True)
    individuals_by_guid = {individual['individualGuid']: individual for individual in individuals}
    families = _get_json_for_families(updated_families, request.user, add_individual_guids_field=True)
    families_by_guid = {family['familyGuid']: family for family in families}

    updated_families_and_individuals_by_guid = {
        'individualsByGuid': individuals_by_guid,
        'familiesByGuid': families_by_guid,
    }

    return create_json_response(updated_families_and_individuals_by_guid)
Ejemplo n.º 7
0
def save_hpo_table_handler(request, project_guid, upload_file_id):
    """
    Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_hpo_table_handler
    """
    project = get_project_and_check_permissions(project_guid, request.user)

    json_records, _ = load_uploaded_file(upload_file_id)

    individual_guids = [record[INDIVIDUAL_GUID_COLUMN] for record in json_records]
    individuals_by_guid = {
        i.guid: i for i in Individual.objects.filter(family__project=project, guid__in=individual_guids)
    }

    for record in json_records:
        individual = individuals_by_guid[record[INDIVIDUAL_GUID_COLUMN]]
        individual.features = [{'id': feature} for feature in record[HPO_TERMS_PRESENT_COLUMN]]
        individual.absent_features = [{'id': feature} for feature in record[HPO_TERMS_ABSENT_COLUMN]]
        individual.save()

    return create_json_response({
        'individualsByGuid': {
            individual['individualGuid']: individual for individual in _get_json_for_individuals(
            individuals_by_guid.values(), user=request.user, add_hpo_details=True,
        )},
    })
Ejemplo n.º 8
0
def saved_variants_page(request, tag):
    gene = request.GET.get('gene')
    if tag == 'ALL':
        saved_variant_models = SavedVariant.objects.exclude(varianttag=None)
    else:
        tag_type = VariantTagType.objects.get(name=tag, project__isnull=True)
        saved_variant_models = SavedVariant.objects.filter(varianttag__variant_tag_type=tag_type)

    saved_variant_models = saved_variant_models.filter(family__project__in=get_projects_user_can_view(request.user))

    if gene:
        saved_variant_models = saved_variant_models.filter(saved_variant_json__transcripts__has_key=gene)
    elif saved_variant_models.count() > MAX_SAVED_VARIANTS:
        return create_json_response({'error': 'Select a gene to filter variants'}, status=400)

    prefetch_related_objects(saved_variant_models, 'family__project')
    response_json = get_json_for_saved_variants_with_tags(saved_variant_models, add_details=True, include_missing_variants=True)

    project_models_by_guid = {variant.family.project.guid: variant.family.project for variant in saved_variant_models}
    families = {variant.family for variant in saved_variant_models}
    individuals = Individual.objects.filter(family__in=families)

    saved_variants = list(response_json['savedVariantsByGuid'].values())
    genes = saved_variant_genes(saved_variants)
    locus_lists_by_guid = _add_locus_lists(list(project_models_by_guid.values()), genes, include_all_lists=True)

    projects_json = get_json_for_projects(list(project_models_by_guid.values()), user=request.user, add_project_category_guids_field=False)
    functional_tag_types = get_json_for_variant_functional_data_tag_types()

    variant_tag_types = VariantTagType.objects.filter(Q(project__in=project_models_by_guid.values()) | Q(project__isnull=True))
    prefetch_related_objects(variant_tag_types, 'project')
    variant_tags_json = _get_json_for_models(variant_tag_types)
    tag_projects = {vt.guid: vt.project.guid for vt in variant_tag_types if vt.project}

    for project_json in projects_json:
        project_guid = project_json['projectGuid']
        project_variant_tags = [
            vt for vt in variant_tags_json if tag_projects.get(vt['variantTagTypeGuid'], project_guid) == project_guid]
        project_json.update({
            'locusListGuids': list(locus_lists_by_guid.keys()),
            'variantTagTypes': sorted(project_variant_tags, key=lambda variant_tag_type: variant_tag_type['order'] or 0),
            'variantFunctionalTagTypes': functional_tag_types,
        })

    families_json = _get_json_for_families(list(families), user=request.user, add_individual_guids_field=True)
    individuals_json = _get_json_for_individuals(individuals, add_hpo_details=True, user=request.user)
    for locus_list in get_json_for_locus_lists(LocusList.objects.filter(guid__in=locus_lists_by_guid.keys()), request.user):
        locus_lists_by_guid[locus_list['locusListGuid']].update(locus_list)

    response_json.update({
        'genesById': genes,
        'projectsByGuid': {project['projectGuid']: project for project in projects_json},
        'familiesByGuid': {family['familyGuid']: family for family in families_json},
        'individualsByGuid': {indiv['individualGuid']: indiv for indiv in individuals_json},
        'locusListsByGuid': locus_lists_by_guid,
    })
    return create_json_response(response_json)
Ejemplo n.º 9
0
def _get_parsed_individuals(family, project_guid=None):
    """Uses HaploPainter to (re)generate the pedigree image for the given family.

    Args:
         family (object): seqr Family model.
    """
    individuals = Individual.objects.filter(family=family)

    if len(individuals) < 2:
        family.pedigree_image = None
        family.save()
        return None

    # convert individuals to json
    individual_records = {
        individual['individualId']: individual
        for individual in _get_json_for_individuals(
            individuals, project_guid=project_guid, family_guid=family.guid)
    }

    # compute a map of parent ids to list of children
    parent_ids_to_children_map = collections.defaultdict(list)
    for individual_id, individual_json in individual_records.items():
        if not individual_json['paternalId'] and not individual_json[
                'maternalId']:
            continue
        key = (individual_json['paternalId'], individual_json['maternalId'])
        parent_ids_to_children_map[key].append(individual_json)

    # generate placeholder individuals as needed, since HaploPainter1.043.pl doesn't support families with only 1 parent
    for ((paternal_id, maternal_id),
         children) in parent_ids_to_children_map.items():

        for parent_id_key, parent_id, sex in [('paternalId', paternal_id, 'M'),
                                              ('maternalId', maternal_id, 'F')
                                              ]:

            if not parent_id or parent_id not in individual_records:
                placeholder_parent_id = 'placeholder_%s' % _random_string(10)
                placeholder_parent_json = {
                    'individualId': placeholder_parent_id,  # fake indiv id
                    'paternalId': '',
                    'maternalId': '',
                    'sex': sex,
                    'affected':
                    'INVISIBLE',  # use a special value to tell HaploPainter to draw this individual as '?'
                }

                for child_json in children:
                    child_json[parent_id_key] = placeholder_parent_id

                individual_records[
                    placeholder_parent_id] = placeholder_parent_json

    # convert to FAM file values
    SEX_TO_FAM_FILE_VALUE = {"M": "1", "F": "2", "U": "0"}
    AFFECTED_STATUS_TO_FAM_FILE_VALUE = {
        "A": "2",
        "N": "1",
        "U": "0",
        "INVISIBLE": "9"
    }  # HaploPainter1.043.pl has been modified to hide individuals with affected-status='9'

    return [{
        'individualId':
        individual_id,
        'paternalId':
        individual_records[individual_id]['paternalId'] or '0',
        'maternalId':
        individual_records[individual_id]['maternalId'] or '0',
        'sex':
        SEX_TO_FAM_FILE_VALUE[individual_records[individual_id]['sex']],
        'affected':
        AFFECTED_STATUS_TO_FAM_FILE_VALUE[individual_records[individual_id]
                                          ['affected']],
    } for individual_id in sorted(individual_records.keys())]
Ejemplo n.º 10
0
def saved_variants_page(request, tag):
    gene = request.GET.get('gene')
    tag_type = VariantTagType.objects.get(name=tag, project__isnull=True)
    saved_variant_models = SavedVariant.objects.filter(
        varianttag__variant_tag_type=tag_type)
    if gene:
        saved_variant_models = saved_variant_models.filter(
            saved_variant_json__transcripts__has_key=gene)

    if saved_variant_models.count() > 10000 and not gene:
        return create_json_response(
            {'message': 'Select a gene to filter variants'}, status=400)

    prefetch_related_objects(saved_variant_models, 'family__project')
    saved_variants = get_json_for_saved_variants(saved_variant_models,
                                                 add_tags=True,
                                                 add_details=True)

    project_models_by_guid = {
        variant.family.project.guid: variant.family.project
        for variant in saved_variant_models
    }
    families = {variant.family for variant in saved_variant_models}
    individuals = Individual.objects.filter(family__in=families)

    genes = _saved_variant_genes(saved_variants)
    locus_list_guids = _add_locus_lists(project_models_by_guid.values(),
                                        saved_variants, genes)

    projects_json = get_json_for_projects(
        project_models_by_guid.values(),
        user=request.user,
        add_project_category_guids_field=False)
    functional_tag_types = get_json_for_variant_functional_data_tag_types()

    variant_tag_types = VariantTagType.objects.filter(
        Q(project__in=project_models_by_guid.values())
        | Q(project__isnull=True))
    prefetch_related_objects(variant_tag_types, 'project')
    variant_tags_json = _get_json_for_models(variant_tag_types)
    tag_projects = {
        vt.guid: vt.project.guid
        for vt in variant_tag_types if vt.project
    }

    for project_json in projects_json:
        project_guid = project_json['projectGuid']
        project_variant_tags = [
            vt for vt in variant_tags_json if tag_projects.get(
                vt['variantTagTypeGuid'], project_guid) == project_guid
        ]
        project_json.update({
            'locusListGuids':
            locus_list_guids,
            'variantTagTypes':
            sorted(project_variant_tags,
                   key=lambda variant_tag_type: variant_tag_type['order']),
            'variantFunctionalTagTypes':
            functional_tag_types,
        })

    families_json = _get_json_for_families(list(families),
                                           user=request.user,
                                           add_individual_guids_field=True)
    individuals_json = _get_json_for_individuals(individuals,
                                                 user=request.user)
    locus_lists_by_guid = {
        locus_list['locusListGuid']: locus_list
        for locus_list in get_json_for_locus_lists(
            LocusList.objects.filter(guid__in=locus_list_guids), request.user)
    }

    return create_json_response({
        'savedVariantsByGuid':
        {variant['variantGuid']: variant
         for variant in saved_variants},
        'genesById': genes,
        'projectsByGuid':
        {project['projectGuid']: project
         for project in projects_json},
        'familiesByGuid':
        {family['familyGuid']: family
         for family in families_json},
        'individualsByGuid':
        {indiv['individualGuid']: indiv
         for indiv in individuals_json},
        'locusListsByGuid': locus_lists_by_guid,
    })
Ejemplo n.º 11
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_project_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = LocusList.objects.filter(
        projects__in=projects).prefetch_related('projects')

    project_guid = projects[0].guid if len(projects) == 1 else None

    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    variant_tag_types_by_guid = {
        vtt.guid: vtt
        for vtt in VariantTagType.objects.filter(
            Q(project__in=projects)
            | Q(project__isnull=True)).prefetch_related('project')
    }
    variant_tag_types = _get_json_for_models(
        list(variant_tag_types_by_guid.values()))
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]

        project_json.update({
            'locusListGuids': [
                locus_list.guid for locus_list in locus_lists
                if project in locus_list.projects.all()
            ],
            'variantTagTypes': [
                vtt for vtt in variant_tag_types
                if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project
                is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']]
                .project.guid == project_json['projectGuid']
            ],
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    family_models = Family.objects.filter(project__in=projects)
    families = _get_json_for_families(family_models,
                                      user,
                                      project_guid=project_guid,
                                      skip_nested=True)

    individual_models = Individual.objects.filter(family__in=family_models)
    individuals = _get_json_for_individuals(individual_models,
                                            user=user,
                                            project_guid=project_guid,
                                            add_hpo_details=True,
                                            skip_nested=True)

    sample_models = Sample.objects.filter(individual__in=individual_models)
    samples = get_json_for_samples(sample_models,
                                   project_guid=project_guid,
                                   skip_nested=True)

    igv_sample_models = IgvSample.objects.filter(
        individual__in=individual_models)
    igv_samples = get_json_for_samples(igv_sample_models,
                                       project_guid=project_guid,
                                       skip_nested=True)

    analysis_group_models = AnalysisGroup.objects.filter(project__in=projects)
    analysis_groups = get_json_for_analysis_groups(analysis_group_models,
                                                   project_guid=project_guid,
                                                   skip_nested=True)

    if not project_guid:
        project_id_to_guid = {project.id: project.guid for project in projects}
        family_id_to_guid = {
            family.id: family.guid
            for family in family_models
        }
        individual_id_to_guid = {
            individual.id: individual.guid
            for individual in individual_models
        }
        family_guid_to_project_guid = {}
        individual_guid_to_project_guid = {}
        for family in families:
            project_guid = project_id_to_guid[family.pop('projectId')]
            family['projectGuid'] = project_guid
            family_guid_to_project_guid[family['familyGuid']] = project_guid
        for individual in individuals:
            family_guid = family_id_to_guid[individual.pop('familyId')]
            project_guid = family_guid_to_project_guid[family_guid]
            individual['familyGuid'] = family_guid
            individual['projectGuid'] = project_guid
            individual_guid_to_project_guid[
                individual['individualGuid']] = project_guid
        for sample in samples:
            individual_guid = individual_id_to_guid[sample.pop('individualId')]
            sample['individualGuid'] = individual_guid
            sample['projectGuid'] = individual_guid_to_project_guid[
                individual_guid]
        for sample in igv_samples:
            individual_guid = individual_id_to_guid[sample.pop('individualId')]
            sample['individualGuid'] = individual_guid
            sample['projectGuid'] = individual_guid_to_project_guid[
                individual_guid]
        for group in analysis_groups:
            group['projectGuid'] = project_id_to_guid[group.pop('projectId')]

    individual_guids_by_family = defaultdict(list)
    for individual in individuals:
        individual_guids_by_family[individual['familyGuid']].append(
            individual['individualGuid'])
    for family in families:
        family['individualGuids'] = individual_guids_by_family[
            family['familyGuid']]

    sample_guids_by_individual = defaultdict(list)
    for sample in samples:
        sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    igv_sample_guids_by_individual = defaultdict(list)
    for sample in igv_samples:
        igv_sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    for individual in individuals:
        individual['sampleGuids'] = sample_guids_by_individual[
            individual['individualGuid']]
        individual['igvSampleGuids'] = igv_sample_guids_by_individual[
            individual['individualGuid']]

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'igvSamplesByGuid': {s['sampleGuid']: s
                             for s in igv_samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(locus_lists, user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Ejemplo n.º 12
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = set()
    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]
        project_locus_lists = get_project_locus_list_models(project)
        locus_lists.update(project_locus_lists)

        project_json.update({
            'locusListGuids':
            [locus_list.guid for locus_list in project_locus_lists],
            'variantTagTypes':
            get_project_variant_tag_types(project),
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    families = _get_json_for_families(
        Family.objects.filter(project__in=projects),
        user,
        add_individual_guids_field=True)
    individuals = _get_json_for_individuals(
        Individual.objects.filter(family__project__in=projects),
        user=user,
        add_sample_guids_field=True)
    samples = get_json_for_samples(
        Sample.objects.filter(individual__family__project__in=projects))
    analysis_groups = get_json_for_analysis_groups(
        AnalysisGroup.objects.filter(project__in=projects))

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(list(locus_lists), user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Ejemplo n.º 13
0
def receive_individuals_table_handler(request, project_guid):
    """Handler for the initial upload of an Excel or .tsv table of individuals. This handler
    parses the records, but doesn't save them in the database. Instead, it saves them to
    a temporary file and sends a 'uploadedFileId' representing this file back to the client. If/when the
    client then wants to 'apply' this table, it can send the uploadedFileId to the
    save_individuals_table(..) handler to actually save the data in the database.

    Args:
        request (object): Django request object
        project_guid (string): project GUID
    """

    project = get_project_and_check_pm_permissions(project_guid, request.user)

    warnings = []

    def process_records(json_records, filename='ped_file'):
        pedigree_records, errors, ped_warnings = parse_pedigree_table(
            json_records, filename, user=request.user, project=project)
        if errors:
            raise ErrorsWarningsException(errors, ped_warnings)
        nonlocal warnings
        warnings += ped_warnings
        return pedigree_records

    try:
        uploaded_file_id, filename, json_records = save_uploaded_file(
            request, process_records=process_records)
    except ErrorsWarningsException as e:
        return create_json_response(
            {
                'errors': e.errors,
                'warnings': e.warnings
            },
            status=400,
            reason=e.errors)
    except Exception as e:
        return create_json_response({
            'errors': [str(e)],
            'warnings': []
        },
                                    status=400,
                                    reason=str(e))

    if warnings:
        # If there are warnings, it might be because the upload referenced valid existing individuals and there is no
        # issue, or because it referenced individuals that actually don't exist, so re-validate with all individuals
        family_ids = {r[JsonConstants.FAMILY_ID_COLUMN] for r in json_records}
        individual_ids = {
            r[JsonConstants.INDIVIDUAL_ID_COLUMN]
            for r in json_records
        }

        related_individuals = Individual.objects.filter(
            family__family_id__in=family_ids,
            family__project=project).exclude(individual_id__in=individual_ids)
        related_individuals_json = _get_json_for_individuals(
            related_individuals,
            project_guid=project_guid,
            family_fields=['family_id'])

        errors, _ = validate_fam_file_records(json_records +
                                              related_individuals_json,
                                              fail_on_warnings=True)
        if errors:
            return create_json_response({
                'errors': errors,
                'warnings': []
            },
                                        status=400,
                                        reason=errors)

    # send back some stats
    individual_ids_by_family = defaultdict(list)
    for r in json_records:
        if r.get(JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN):
            individual_ids_by_family[r[JsonConstants.FAMILY_ID_COLUMN]].append(
                (r[JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN], True))
        else:
            individual_ids_by_family[r[JsonConstants.FAMILY_ID_COLUMN]].append(
                (r[JsonConstants.INDIVIDUAL_ID_COLUMN], False))

    num_individuals = sum(
        [len(indiv_ids) for indiv_ids in individual_ids_by_family.values()])
    num_existing_individuals = 0
    missing_prev_ids = []
    for family_id, indiv_ids in individual_ids_by_family.items():
        existing_individuals = {
            i.individual_id
            for i in Individual.objects.filter(
                individual_id__in=[indiv_id for (indiv_id, _) in indiv_ids],
                family__family_id=family_id,
                family__project=project).only('individual_id')
        }
        num_existing_individuals += len(existing_individuals)
        missing_prev_ids += [
            indiv_id for (indiv_id, is_previous) in indiv_ids
            if is_previous and indiv_id not in existing_individuals
        ]
    num_individuals_to_create = num_individuals - num_existing_individuals
    if missing_prev_ids:
        return create_json_response(
            {
                'errors': [
                    'Could not find individuals with the following previous IDs: {}'
                    .format(', '.join(missing_prev_ids))
                ],
                'warnings': []
            },
            status=400,
            reason='Invalid input')

    family_ids = set(r[JsonConstants.FAMILY_ID_COLUMN] for r in json_records)
    num_families = len(family_ids)
    num_existing_families = Family.objects.filter(family_id__in=family_ids,
                                                  project=project).count()
    num_families_to_create = num_families - num_existing_families

    info = [
        "{num_families} families, {num_individuals} individuals parsed from {filename}"
        .format(num_families=num_families,
                num_individuals=num_individuals,
                filename=filename),
        "{} new families, {} new individuals will be added to the project".
        format(num_families_to_create, num_individuals_to_create),
        "{} existing individuals will be updated".format(
            num_existing_individuals),
    ]

    response = {
        'uploadedFileId': uploaded_file_id,
        'errors': [],
        'warnings': [],
        'info': info,
    }
    logger.info(response)
    return create_json_response(response)
Ejemplo n.º 14
0
def _get_parsed_individuals(family, project_guid=None):
    """Uses HaploPainter to (re)generate the pedigree image for the given family.

    Args:
         family (object): seqr Family model.
    """
    individuals = Individual.objects.filter(family=family)

    if len(individuals) < 2:
        update_seqr_model(family, pedigree_image=None)
        return None

    # convert individuals to json
    individual_records = {
        individual['individualId']: individual for individual in
        _get_json_for_individuals(individuals, project_guid=project_guid, family_guid=family.guid)
    }

    # compute a map of parent ids to list of children
    parent_ids_to_children_map = collections.defaultdict(list)
    for individual_id, individual_json in individual_records.items():
        if not individual_json['paternalId'] and not individual_json['maternalId']:
            continue
        key = (individual_json['paternalId'], individual_json['maternalId'])
        parent_ids_to_children_map[key].append(individual_json)

    # generate placeholder individuals as needed, since HaploPainter1.043.pl doesn't support families with only 1 parent
    for ((paternal_id, maternal_id), children) in parent_ids_to_children_map.items():

        for parent_id_key, parent_id, sex in [
            ('paternalId', paternal_id, 'M'),
            ('maternalId', maternal_id, 'F')
        ]:

            if not parent_id or parent_id not in individual_records:
                placeholder_parent_id = 'placeholder_%s'% _random_string(10)
                placeholder_parent_json = {
                    'individualId': placeholder_parent_id,  # fake indiv id
                    'paternalId': '',
                    'maternalId': '',
                    'sex': sex,
                    'affected': 'INVISIBLE',  # use a special value to tell HaploPainter to draw this individual as '?'
                }

                for child_json in children:
                    child_json[parent_id_key] = placeholder_parent_id

                individual_records[placeholder_parent_id] = placeholder_parent_json

    # convert to FAM file values
    SEX_TO_FAM_FILE_VALUE = {"M": "1", "F": "2", "U": "0"}
    AFFECTED_STATUS_TO_FAM_FILE_VALUE = {"A": "2", "N": "1", "U": "0", "INVISIBLE": "9"}   # HaploPainter1.043.pl has been modified to hide individuals with affected-status='9'

    return {
        individual_id: {
            'individualId': individual_id,
            'paternalId': individual_json['paternalId'] or '0',
            'maternalId': individual_json['maternalId'] or '0',
            'sex': SEX_TO_FAM_FILE_VALUE[individual_json['sex']],
            'affected': AFFECTED_STATUS_TO_FAM_FILE_VALUE[individual_json['affected']],
        } for individual_id, individual_json in individual_records.items()
    }
Ejemplo n.º 15
0
    def handle(self, *args, **options):

        if options["index"]:
            es_indices = options["index"]
        elif options["use_project_indices_csv"]:
            with open('project_indices.csv') as csvfile:
                reader = csv.DictReader(csvfile)
                es_indices = {row['index'] for row in reader}

        else:
            projects_q = BaseProject.objects.filter(genome_version='37')
            for exclude_project in EXCLUDE_PROJECTS:
                projects_q = projects_q.exclude(
                    project_name__icontains=exclude_project)
            indices_for_project = defaultdict(list)
            for project in projects_q:
                indices_for_project[project.get_elasticsearch_index()].append(
                    project)
            indices_for_project.pop(None, None)

            seqr_projects = []
            with open('project_indices.csv', 'wb') as csvfile:
                fieldnames = ['projectGuid', 'index']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for index, projects in indices_for_project.items():
                    for project in projects:
                        seqr_projects.append(project.seqr_project)
                        writer.writerow({
                            'projectGuid': project.seqr_project.guid,
                            'index': index
                        })

            individuals = _get_json_for_individuals(
                Individual.objects.filter(family__project__in=seqr_projects))
            with open('seqr_individuals.csv', 'wb') as csvfile:
                fieldnames = [
                    'projectGuid', 'familyGuid', 'individualId', 'paternalId',
                    'maternalId', 'sex', 'affected'
                ]
                writer = csv.DictWriter(csvfile,
                                        fieldnames=fieldnames,
                                        extrasaction='ignore')
                writer.writeheader()
                for individual in individuals:
                    writer.writerow(individual)
            es_indices = indices_for_project.keys()

        if not options["metadata_only"]:
            es_client = elasticsearch.Elasticsearch(
                host=settings.ELASTICSEARCH_SERVICE_HOSTNAME, timeout=10000)
            search = elasticsearch_dsl.Search(using=es_client,
                                              index='*,'.join(es_indices) +
                                              "*")
            search = search.query("match", mainTranscript_lof='HC')
            search = search.source([
                'contig', 'pos', 'ref', 'alt', '*num_alt', '*gq', '*ab', '*dp',
                '*ad'
            ])

            print('Searching across {} indices...'.format(len(es_indices)))
            result_count_search = search.params(size=0)
            total = result_count_search.execute().hits.total
            print('Loading {} variants...'.format(total))

            with open('lof_variants.csv', 'a') as csvfile:
                sample_fields = ['num_alt', 'gq', 'ab', 'dp', 'ad']
                fieldnames = ['contig', 'pos', 'ref', 'alt', 'index'
                              ] + sample_fields
                writer = csv.DictWriter(csvfile,
                                        fieldnames=fieldnames,
                                        extrasaction='ignore')
                if not options["index"]:
                    writer.writeheader()
                for i, hit in enumerate(search.scan()):
                    result = {key: hit[key] for key in hit}
                    result['index'] = hit.meta.index
                    for field in sample_fields:
                        result[field] = json.dumps({
                            key.rstrip('_{}'.format(field)): val
                            for key, val in result.items()
                            if key.endswith(field)
                        })
                    writer.writerow(result)
                    if i % 10000 == 0:
                        print('Parsed {} variants'.format(i))

            print('Loaded {} variants'.format(i))

        print('Done')
Ejemplo n.º 16
0
def edit_individuals_handler(request, project_guid):
    """Modify one or more Individual records.

    Args:
        request (object): Django HTTP Request object.
        project_guid (string): GUID of project that contains these individuals.

    Request:
        body should be a json dictionary that contains a 'individuals' list that includes the individuals to update,
         represented by dictionaries of their guid and fields to update -
        for example:
            {
                'individuals': [
                    { 'individualGuid': <individualGuid1>, 'paternalId': <paternalId>, 'affected': 'A' },
                    { 'individualGuid': <individualGuid1>, 'sex': 'U' },
                    ...
                [
            }

    Response:
        json dictionary representing the updated individual(s) like:
            {
                <individualGuid1> : { individualId: xxx, sex: xxx, affected: xxx, ...},
                <individualGuid2> : { individualId: xxx, sex: xxx, affected: xxx, ...},
                ...
            }
    """

    project = get_project_and_check_permissions(project_guid, request.user, CAN_EDIT)

    request_json = json.loads(request.body)

    modified_individuals_list = request_json.get('individuals')
    if modified_individuals_list is None:
        return create_json_response(
            {}, status=400, reason="'individuals' not specified")

    update_individuals = {ind['individualGuid']: ind for ind in modified_individuals_list}
    update_individual_models = {ind.guid: ind for ind in Individual.objects.filter(guid__in=update_individuals.keys())}
    for modified_ind in modified_individuals_list:
        model = update_individual_models[modified_ind['individualGuid']]
        if modified_ind[JsonConstants.INDIVIDUAL_ID_COLUMN] != model.individual_id:
            modified_ind[JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN] = model.individual_id

    modified_family_ids = {ind.get('familyId') or ind['family']['familyId'] for ind in modified_individuals_list}
    modified_family_ids.update({ind.family.family_id for ind in update_individual_models.values()})
    related_individuals = Individual.objects.filter(
        family__family_id__in=modified_family_ids, family__project=project).exclude(guid__in=update_individuals.keys())
    related_individuals_json = _get_json_for_individuals(related_individuals, project_guid=project_guid, family_fields=['family_id'])
    individuals_list = modified_individuals_list + related_individuals_json

    # TODO more validation?
    errors, warnings = validate_fam_file_records(individuals_list, fail_on_warnings=True)
    if errors:
        return create_json_response({'errors': errors, 'warnings': warnings}, status=400, reason='Invalid updates')

    try:
        updated_families, updated_individuals = add_or_update_individuals_and_families(
            project, modified_individuals_list, user=request.user
        )
    except Exception as e:
        return create_json_response({'errors': [e.message]}, status=400, reason='Invalid updates')

    individuals_by_guid = {
        individual.guid: _get_json_for_individual(individual, request.user) for individual in updated_individuals
    }
    families_by_guid = {
        family.guid: _get_json_for_family(family, request.user, add_individual_guids_field=True)
        for family in updated_families
    }

    return create_json_response({
        'individualsByGuid': individuals_by_guid,
        'familiesByGuid': families_by_guid,
    })
Ejemplo n.º 17
0
def anvil_export(request, project_guid):
    if project_guid == 'all':
        project_guid = None

    if project_guid:
        projects_by_guid = {project_guid: Project.objects.get(guid=project_guid)}
    else:
        projects_by_guid = {p.guid: p for p in Project.objects.filter(projectcategory__name__iexact='anvil')}

    families = _get_over_year_loaded_project_families(projects_by_guid.values())
    prefetch_related_objects(families, 'individual_set')

    saved_variants_by_family = _get_saved_variants_by_family(projects_by_guid.values(), request.user)

    # Handle compound het genes
    compound_het_gene_id_by_family = {}
    for family_guid, saved_variants in saved_variants_by_family.items():
        if len(saved_variants) > 1:
            potential_compound_het_variants = [
                variant for variant in saved_variants if all(gen['numAlt'] < 2 for gen in variant['genotypes'].values())
            ]
            main_gene_ids = {variant['mainTranscript']['geneId'] for variant in potential_compound_het_variants}
            if len(main_gene_ids) > 1:
                # This occurs in compound hets where some hits have a primary transcripts in different genes
                for gene_id in main_gene_ids:
                    if all(gene_id in variant['transcripts'] for variant in potential_compound_het_variants):
                        compound_het_gene_id_by_family[family_guid] = gene_id

    individuals = set()
    for family in families:
        individuals.update(family.individual_set.all())
    rows = _get_json_for_individuals(list(individuals), project_guid=project_guid, family_fields=['family_id', 'coded_phenotype'])

    gene_ids = set()
    for row in rows:
        row['Project_ID'] = projects_by_guid[row['projectGuid']].name

        saved_variants = saved_variants_by_family[row['familyGuid']]
        row['numSavedVariants'] = len(saved_variants)
        for i, variant in enumerate(saved_variants):
            genotype = variant['genotypes'].get(row['individualGuid'], {})
            if genotype.get('numAlt', -1) > 0:
                gene_id = compound_het_gene_id_by_family.get(row['familyGuid']) or variant['mainTranscript']['geneId']
                gene_ids.add(gene_id)
                variant_fields = {
                    'Zygosity': 'heterozygous' if genotype['numAlt'] == 1 else 'homozygous',
                    'Chrom': variant['chrom'],
                    'Pos': variant['pos'],
                    'Ref': variant['ref'],
                    'Alt': variant['alt'],
                    'hgvsc': variant['mainTranscript']['hgvsc'],
                    'hgvsp': variant['mainTranscript']['hgvsp'],
                    'Transcript': variant['mainTranscript']['transcriptId'],
                    'geneId': gene_id,
                }
                row.update({'{}-{}'.format(k, i + 1): v for k, v in variant_fields.items()})

    genes_by_id = get_genes(gene_ids)
    for row in rows:
        for key, gene_id in row.items():
            if key.startswith('geneId') and genes_by_id.get(gene_id):
                row[key.replace('geneId', 'Gene')] = genes_by_id[gene_id]['geneSymbol']

    return create_json_response({'anvilRows': rows})
Ejemplo n.º 18
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = set()

    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    variant_tag_types_by_guid = {
        vtt.guid: vtt
        for vtt in VariantTagType.objects.filter(
            Q(project__in=projects)
            | Q(project__isnull=True)).prefetch_related('project')
    }
    variant_tag_types = _get_json_for_models(
        variant_tag_types_by_guid.values())
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]
        project_locus_lists = get_project_locus_list_models(project)
        locus_lists.update(project_locus_lists)

        project_json.update({
            'locusListGuids':
            [locus_list.guid for locus_list in project_locus_lists],
            'variantTagTypes': [
                vtt for vtt in variant_tag_types
                if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project
                is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']]
                .project.guid == project_json['projectGuid']
            ],
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    families = _get_json_for_families(
        Family.objects.filter(project__in=projects), user)
    individuals = _get_json_for_individuals(
        Individual.objects.filter(family__project__in=projects), user=user)
    samples = get_json_for_samples(
        Sample.objects.filter(individual__family__project__in=projects))
    analysis_groups = get_json_for_analysis_groups(
        AnalysisGroup.objects.filter(project__in=projects))

    individual_guids_by_family = defaultdict(list)
    for individual in individuals:
        individual_guids_by_family[individual['familyGuid']].append(
            individual['individualGuid'])
    for family in families:
        family['individualGuids'] = individual_guids_by_family[
            family['familyGuid']]

    sample_guids_by_individual = defaultdict(list)
    for sample in samples:
        sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    for individual in individuals:
        individual['sampleGuids'] = sample_guids_by_individual[
            individual['individualGuid']]

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(list(locus_lists), user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Ejemplo n.º 19
0
    def handle(self, *args, **options):

        if options["index"]:
            es_indices = options["index"]
        elif options["use_project_indices_csv"]:
            with open('project_indices.csv') as csvfile:
                reader = csv.DictReader(csvfile)
                es_indices = {row['index'] for row in reader}

        else:
            projects_q = BaseProject.objects.filter(genome_version='37')
            for exclude_project in EXCLUDE_PROJECTS:
                projects_q = projects_q.exclude(project_name__icontains=exclude_project)
            indices_for_project = defaultdict(list)
            for project in projects_q:
                indices_for_project[project.get_elasticsearch_index()].append(project)
            indices_for_project.pop(None, None)

            seqr_projects = []
            with open('project_indices.csv', 'wb') as csvfile:
                fieldnames = ['projectGuid', 'index']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for index, projects in indices_for_project.items():
                    for project in projects:
                        seqr_projects.append(project.seqr_project)
                        writer.writerow({'projectGuid': project.seqr_project.guid, 'index': index})

            individuals = _get_json_for_individuals(Individual.objects.filter(family__project__in=seqr_projects))
            with open('seqr_individuals.csv', 'wb') as csvfile:
                fieldnames = ['projectGuid', 'familyGuid', 'individualId', 'paternalId', 'maternalId', 'sex',
                              'affected']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore')
                writer.writeheader()
                for individual in individuals:
                    writer.writerow(individual)
            es_indices = indices_for_project.keys()

        if not options["metadata_only"]:
            es_client = elasticsearch.Elasticsearch(host=settings.ELASTICSEARCH_SERVICE_HOSTNAME, timeout=10000)
            search = elasticsearch_dsl.Search(using=es_client, index='*,'.join(es_indices) + "*")
            search = search.query("match", mainTranscript_lof='HC')
            search = search.source(['contig', 'pos', 'ref', 'alt', '*num_alt', '*gq', '*ab', '*dp', '*ad'])

            print('Searching across {} indices...'.format(len(es_indices)))
            result_count_search = search.params(size=0)
            total = result_count_search.execute().hits.total
            print('Loading {} variants...'.format(total))

            with open('lof_variants.csv', 'a') as csvfile:
                sample_fields = ['num_alt', 'gq', 'ab', 'dp', 'ad']
                fieldnames = ['contig', 'pos', 'ref', 'alt', 'index'] + sample_fields
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore')
                if not options["index"]:
                    writer.writeheader()
                for i, hit in enumerate(search.scan()):
                    result = {key: hit[key] for key in hit}
                    result['index'] = hit.meta.index
                    for field in sample_fields:
                        result[field] = json.dumps({
                            key.rstrip('_{}'.format(field)): val for key, val in result.items() if key.endswith(field)
                        })
                    writer.writerow(result)
                    if i % 10000 == 0:
                        print('Parsed {} variants'.format(i))

            print('Loaded {} variants'.format(i))

        print('Done')