def _retrieve_individuals(project_guid, user): """Retrieves individual-level metadata for the given project. Args: project_guid (string): project_guid Returns: dictionary: individuals_by_guid """ fields = Individual._meta.json_fields + Individual._meta.internal_json_fields + \ ['family__guid', 'case_review_status_last_modified_by__email'] individual_models = Individual.objects.filter(family__project__guid=project_guid)\ .select_related('family', 'case_review_status_last_modified_by').only(*fields) individuals = _get_json_for_individuals(individual_models, user=user, project_guid=project_guid) individuals_by_guid = {} for i in individuals: i['sampleGuids'] = set() individual_guid = i['individualGuid'] individuals_by_guid[individual_guid] = i return individuals_by_guid
def anvil_export(request, project_guid): if project_guid == 'all': project_guid = None if project_guid: projects_by_guid = {project_guid: Project.objects.get(guid=project_guid)} else: projects_by_guid = {p.guid: p for p in Project.objects.filter(projectcategory__name__iexact='anvil')} individuals = _get_loaded_before_date_project_individuals(projects_by_guid.values(), loaded_before=request.GET.get('loadedBefore')) saved_variants_by_family = _get_saved_variants_by_family(projects_by_guid.values(), request.user) # Handle compound het genes compound_het_gene_id_by_family = {} for family_guid, saved_variants in saved_variants_by_family.items(): if len(saved_variants) > 1: potential_compound_het_variants = [ variant for variant in saved_variants if all(gen['numAlt'] < 2 for gen in variant['genotypes'].values()) ] main_gene_ids = {variant['mainTranscript']['geneId'] for variant in potential_compound_het_variants} if len(main_gene_ids) > 1: # This occurs in compound hets where some hits have a primary transcripts in different genes for gene_id in main_gene_ids: if all(gene_id in variant['transcripts'] for variant in potential_compound_het_variants): compound_het_gene_id_by_family[family_guid] = gene_id rows = _get_json_for_individuals(list(individuals), project_guid=project_guid, family_fields=['family_id', 'coded_phenotype']) gene_ids = set() for row in rows: row['Project_ID'] = projects_by_guid[row['projectGuid']].name saved_variants = saved_variants_by_family[row['familyGuid']] row['numSavedVariants'] = len(saved_variants) for i, variant in enumerate(saved_variants): genotype = variant['genotypes'].get(row['individualGuid'], {}) if genotype.get('numAlt', -1) > 0: gene_id = compound_het_gene_id_by_family.get(row['familyGuid']) or variant['mainTranscript']['geneId'] gene_ids.add(gene_id) variant_fields = { 'Zygosity': 'heterozygous' if genotype['numAlt'] == 1 else 'homozygous', 'Chrom': variant['chrom'], 'Pos': variant['pos'], 'Ref': variant['ref'], 'Alt': variant['alt'], 'hgvsc': variant['mainTranscript']['hgvsc'], 'hgvsp': variant['mainTranscript']['hgvsp'], 'Transcript': variant['mainTranscript']['transcriptId'], 'geneId': gene_id, } row.update({'{}-{}'.format(k, i + 1): v for k, v in variant_fields.items()}) genes_by_id = get_genes(gene_ids) for row in rows: for key, gene_id in row.items(): if key.startswith('geneId') and genes_by_id.get(gene_id): row[key.replace('geneId', 'Gene')] = genes_by_id[gene_id]['geneSymbol'] return create_json_response({'anvilRows': rows})
def save_individuals_metadata_table_handler(request, project_guid, upload_file_id): """ Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_individuals_metadata_handler """ project = get_project_and_check_permissions(project_guid, request.user) json_records, _ = load_uploaded_file(upload_file_id) individual_guids = [record[INDIVIDUAL_GUID_COL] for record in json_records] individuals_by_guid = { i.guid: i for i in Individual.objects.filter(family__project=project, guid__in=individual_guids) } for record in json_records: individual = individuals_by_guid[record[INDIVIDUAL_GUID_COL]] update_model_from_json(individual, { k: record[k] for k in INDIVIDUAL_METADATA_FIELDS.keys() if k in record }, user=request.user) return create_json_response({ 'individualsByGuid': { individual['individualGuid']: individual for individual in _get_json_for_individuals( list(individuals_by_guid.values()), user=request.user, add_hpo_details=True, ) }, })
def _retrieve_individuals(project_guid, user): """Retrieves individual-level metadata for the given project. Args: project_guid (string): project_guid Returns: dictionary: individuals_by_guid """ individual_models = Individual.objects.filter( family__project__guid=project_guid) individuals = _get_json_for_individuals(individual_models, user=user, project_guid=project_guid, add_hpo_details=True) individuals_by_guid = {} for i in individuals: i['sampleGuids'] = set() i['igvSampleGuids'] = set() i['mmeSubmissionGuid'] = None individual_guid = i['individualGuid'] individuals_by_guid[individual_guid] = i return individuals_by_guid, individual_models
def saved_variants(request, tag): tag_type = VariantTagType.objects.get(name=tag, project__isnull=True) saved_variant_models = SavedVariant.objects.filter(varianttag__variant_tag_type=tag_type, family__isnull=False) saved_variants = get_json_for_saved_variants(saved_variant_models, add_tags=True, add_details=True, user=request.user) project_models_by_guid = {variant.project.guid: variant.project for variant in saved_variant_models} families = {variant.family for variant in saved_variant_models} individuals = Individual.objects.filter(family__in=families) genes = _saved_variant_genes(saved_variants) locus_list_guids = _add_locus_lists(project_models_by_guid.values(), saved_variants, genes) projects_json = get_json_for_projects(project_models_by_guid.values(), user=request.user, add_project_category_guids_field=False) functional_tag_types = get_json_for_variant_functional_data_tag_types() for project_json in projects_json: project_json.update({ 'locusListGuids': locus_list_guids, 'variantTagTypes': get_project_variant_tag_types(project_models_by_guid[project_json['projectGuid']]), 'variantFunctionalTagTypes': functional_tag_types, }) families_json = _get_json_for_families(list(families), user=request.user, add_individual_guids_field=True) individuals_json = _get_json_for_individuals(individuals, user=request.user) locus_lists_by_guid = {locus_list['locusListGuid']: locus_list for locus_list in get_json_for_locus_lists(LocusList.objects.filter(guid__in=locus_list_guids), request.user)} return create_json_response({ 'savedVariantsByGuid': {variant['variantGuid']: variant for variant in saved_variants}, 'genesById': genes, 'projectsByGuid': {project['projectGuid']: project for project in projects_json}, 'familiesByGuid': {family['familyGuid']: family for family in families_json}, 'individualsByGuid': {indiv['individualGuid']: indiv for indiv in individuals_json}, 'locusListsByGuid': locus_lists_by_guid, })
def save_individuals_table_handler(request, project_guid, upload_file_id): """Handler for 'save' requests to apply Individual tables previously uploaded through receive_individuals_table(..) Args: request (object): Django request object project_guid (string): project GUID uploadedFileId (string): a token sent to the client by receive_individuals_table(..) """ project = get_project_and_check_permissions(project_guid, request.user) json_records = load_uploaded_file(upload_file_id) updated_families, updated_individuals = add_or_update_individuals_and_families( project, individual_records=json_records, user=request.user ) # edit individuals individuals = _get_json_for_individuals(updated_individuals, request.user, add_sample_guids_field=True) individuals_by_guid = {individual['individualGuid']: individual for individual in individuals} families = _get_json_for_families(updated_families, request.user, add_individual_guids_field=True) families_by_guid = {family['familyGuid']: family for family in families} updated_families_and_individuals_by_guid = { 'individualsByGuid': individuals_by_guid, 'familiesByGuid': families_by_guid, } return create_json_response(updated_families_and_individuals_by_guid)
def save_hpo_table_handler(request, project_guid, upload_file_id): """ Handler for 'save' requests to apply HPO terms tables previously uploaded through receive_hpo_table_handler """ project = get_project_and_check_permissions(project_guid, request.user) json_records, _ = load_uploaded_file(upload_file_id) individual_guids = [record[INDIVIDUAL_GUID_COLUMN] for record in json_records] individuals_by_guid = { i.guid: i for i in Individual.objects.filter(family__project=project, guid__in=individual_guids) } for record in json_records: individual = individuals_by_guid[record[INDIVIDUAL_GUID_COLUMN]] individual.features = [{'id': feature} for feature in record[HPO_TERMS_PRESENT_COLUMN]] individual.absent_features = [{'id': feature} for feature in record[HPO_TERMS_ABSENT_COLUMN]] individual.save() return create_json_response({ 'individualsByGuid': { individual['individualGuid']: individual for individual in _get_json_for_individuals( individuals_by_guid.values(), user=request.user, add_hpo_details=True, )}, })
def saved_variants_page(request, tag): gene = request.GET.get('gene') if tag == 'ALL': saved_variant_models = SavedVariant.objects.exclude(varianttag=None) else: tag_type = VariantTagType.objects.get(name=tag, project__isnull=True) saved_variant_models = SavedVariant.objects.filter(varianttag__variant_tag_type=tag_type) saved_variant_models = saved_variant_models.filter(family__project__in=get_projects_user_can_view(request.user)) if gene: saved_variant_models = saved_variant_models.filter(saved_variant_json__transcripts__has_key=gene) elif saved_variant_models.count() > MAX_SAVED_VARIANTS: return create_json_response({'error': 'Select a gene to filter variants'}, status=400) prefetch_related_objects(saved_variant_models, 'family__project') response_json = get_json_for_saved_variants_with_tags(saved_variant_models, add_details=True, include_missing_variants=True) project_models_by_guid = {variant.family.project.guid: variant.family.project for variant in saved_variant_models} families = {variant.family for variant in saved_variant_models} individuals = Individual.objects.filter(family__in=families) saved_variants = list(response_json['savedVariantsByGuid'].values()) genes = saved_variant_genes(saved_variants) locus_lists_by_guid = _add_locus_lists(list(project_models_by_guid.values()), genes, include_all_lists=True) projects_json = get_json_for_projects(list(project_models_by_guid.values()), user=request.user, add_project_category_guids_field=False) functional_tag_types = get_json_for_variant_functional_data_tag_types() variant_tag_types = VariantTagType.objects.filter(Q(project__in=project_models_by_guid.values()) | Q(project__isnull=True)) prefetch_related_objects(variant_tag_types, 'project') variant_tags_json = _get_json_for_models(variant_tag_types) tag_projects = {vt.guid: vt.project.guid for vt in variant_tag_types if vt.project} for project_json in projects_json: project_guid = project_json['projectGuid'] project_variant_tags = [ vt for vt in variant_tags_json if tag_projects.get(vt['variantTagTypeGuid'], project_guid) == project_guid] project_json.update({ 'locusListGuids': list(locus_lists_by_guid.keys()), 'variantTagTypes': sorted(project_variant_tags, key=lambda variant_tag_type: variant_tag_type['order'] or 0), 'variantFunctionalTagTypes': functional_tag_types, }) families_json = _get_json_for_families(list(families), user=request.user, add_individual_guids_field=True) individuals_json = _get_json_for_individuals(individuals, add_hpo_details=True, user=request.user) for locus_list in get_json_for_locus_lists(LocusList.objects.filter(guid__in=locus_lists_by_guid.keys()), request.user): locus_lists_by_guid[locus_list['locusListGuid']].update(locus_list) response_json.update({ 'genesById': genes, 'projectsByGuid': {project['projectGuid']: project for project in projects_json}, 'familiesByGuid': {family['familyGuid']: family for family in families_json}, 'individualsByGuid': {indiv['individualGuid']: indiv for indiv in individuals_json}, 'locusListsByGuid': locus_lists_by_guid, }) return create_json_response(response_json)
def _get_parsed_individuals(family, project_guid=None): """Uses HaploPainter to (re)generate the pedigree image for the given family. Args: family (object): seqr Family model. """ individuals = Individual.objects.filter(family=family) if len(individuals) < 2: family.pedigree_image = None family.save() return None # convert individuals to json individual_records = { individual['individualId']: individual for individual in _get_json_for_individuals( individuals, project_guid=project_guid, family_guid=family.guid) } # compute a map of parent ids to list of children parent_ids_to_children_map = collections.defaultdict(list) for individual_id, individual_json in individual_records.items(): if not individual_json['paternalId'] and not individual_json[ 'maternalId']: continue key = (individual_json['paternalId'], individual_json['maternalId']) parent_ids_to_children_map[key].append(individual_json) # generate placeholder individuals as needed, since HaploPainter1.043.pl doesn't support families with only 1 parent for ((paternal_id, maternal_id), children) in parent_ids_to_children_map.items(): for parent_id_key, parent_id, sex in [('paternalId', paternal_id, 'M'), ('maternalId', maternal_id, 'F') ]: if not parent_id or parent_id not in individual_records: placeholder_parent_id = 'placeholder_%s' % _random_string(10) placeholder_parent_json = { 'individualId': placeholder_parent_id, # fake indiv id 'paternalId': '', 'maternalId': '', 'sex': sex, 'affected': 'INVISIBLE', # use a special value to tell HaploPainter to draw this individual as '?' } for child_json in children: child_json[parent_id_key] = placeholder_parent_id individual_records[ placeholder_parent_id] = placeholder_parent_json # convert to FAM file values SEX_TO_FAM_FILE_VALUE = {"M": "1", "F": "2", "U": "0"} AFFECTED_STATUS_TO_FAM_FILE_VALUE = { "A": "2", "N": "1", "U": "0", "INVISIBLE": "9" } # HaploPainter1.043.pl has been modified to hide individuals with affected-status='9' return [{ 'individualId': individual_id, 'paternalId': individual_records[individual_id]['paternalId'] or '0', 'maternalId': individual_records[individual_id]['maternalId'] or '0', 'sex': SEX_TO_FAM_FILE_VALUE[individual_records[individual_id]['sex']], 'affected': AFFECTED_STATUS_TO_FAM_FILE_VALUE[individual_records[individual_id] ['affected']], } for individual_id in sorted(individual_records.keys())]
def saved_variants_page(request, tag): gene = request.GET.get('gene') tag_type = VariantTagType.objects.get(name=tag, project__isnull=True) saved_variant_models = SavedVariant.objects.filter( varianttag__variant_tag_type=tag_type) if gene: saved_variant_models = saved_variant_models.filter( saved_variant_json__transcripts__has_key=gene) if saved_variant_models.count() > 10000 and not gene: return create_json_response( {'message': 'Select a gene to filter variants'}, status=400) prefetch_related_objects(saved_variant_models, 'family__project') saved_variants = get_json_for_saved_variants(saved_variant_models, add_tags=True, add_details=True) project_models_by_guid = { variant.family.project.guid: variant.family.project for variant in saved_variant_models } families = {variant.family for variant in saved_variant_models} individuals = Individual.objects.filter(family__in=families) genes = _saved_variant_genes(saved_variants) locus_list_guids = _add_locus_lists(project_models_by_guid.values(), saved_variants, genes) projects_json = get_json_for_projects( project_models_by_guid.values(), user=request.user, add_project_category_guids_field=False) functional_tag_types = get_json_for_variant_functional_data_tag_types() variant_tag_types = VariantTagType.objects.filter( Q(project__in=project_models_by_guid.values()) | Q(project__isnull=True)) prefetch_related_objects(variant_tag_types, 'project') variant_tags_json = _get_json_for_models(variant_tag_types) tag_projects = { vt.guid: vt.project.guid for vt in variant_tag_types if vt.project } for project_json in projects_json: project_guid = project_json['projectGuid'] project_variant_tags = [ vt for vt in variant_tags_json if tag_projects.get( vt['variantTagTypeGuid'], project_guid) == project_guid ] project_json.update({ 'locusListGuids': locus_list_guids, 'variantTagTypes': sorted(project_variant_tags, key=lambda variant_tag_type: variant_tag_type['order']), 'variantFunctionalTagTypes': functional_tag_types, }) families_json = _get_json_for_families(list(families), user=request.user, add_individual_guids_field=True) individuals_json = _get_json_for_individuals(individuals, user=request.user) locus_lists_by_guid = { locus_list['locusListGuid']: locus_list for locus_list in get_json_for_locus_lists( LocusList.objects.filter(guid__in=locus_list_guids), request.user) } return create_json_response({ 'savedVariantsByGuid': {variant['variantGuid']: variant for variant in saved_variants}, 'genesById': genes, 'projectsByGuid': {project['projectGuid']: project for project in projects_json}, 'familiesByGuid': {family['familyGuid']: family for family in families_json}, 'individualsByGuid': {indiv['individualGuid']: indiv for indiv in individuals_json}, 'locusListsByGuid': locus_lists_by_guid, })
def _get_projects_details(projects, user, project_category_guid=None): for project in projects: check_project_permissions(project, user) prefetch_related_objects(projects, 'can_view_group') project_models_by_guid = {project.guid: project for project in projects} projects_json = get_json_for_projects(projects, user) locus_lists = LocusList.objects.filter( projects__in=projects).prefetch_related('projects') project_guid = projects[0].guid if len(projects) == 1 else None functional_data_tag_types = get_json_for_variant_functional_data_tag_types( ) variant_tag_types_by_guid = { vtt.guid: vtt for vtt in VariantTagType.objects.filter( Q(project__in=projects) | Q(project__isnull=True)).prefetch_related('project') } variant_tag_types = _get_json_for_models( list(variant_tag_types_by_guid.values())) for project_json in projects_json: project = project_models_by_guid[project_json['projectGuid']] project_json.update({ 'locusListGuids': [ locus_list.guid for locus_list in locus_lists if project in locus_list.projects.all() ], 'variantTagTypes': [ vtt for vtt in variant_tag_types if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']] .project.guid == project_json['projectGuid'] ], 'variantFunctionalTagTypes': functional_data_tag_types, }) family_models = Family.objects.filter(project__in=projects) families = _get_json_for_families(family_models, user, project_guid=project_guid, skip_nested=True) individual_models = Individual.objects.filter(family__in=family_models) individuals = _get_json_for_individuals(individual_models, user=user, project_guid=project_guid, add_hpo_details=True, skip_nested=True) sample_models = Sample.objects.filter(individual__in=individual_models) samples = get_json_for_samples(sample_models, project_guid=project_guid, skip_nested=True) igv_sample_models = IgvSample.objects.filter( individual__in=individual_models) igv_samples = get_json_for_samples(igv_sample_models, project_guid=project_guid, skip_nested=True) analysis_group_models = AnalysisGroup.objects.filter(project__in=projects) analysis_groups = get_json_for_analysis_groups(analysis_group_models, project_guid=project_guid, skip_nested=True) if not project_guid: project_id_to_guid = {project.id: project.guid for project in projects} family_id_to_guid = { family.id: family.guid for family in family_models } individual_id_to_guid = { individual.id: individual.guid for individual in individual_models } family_guid_to_project_guid = {} individual_guid_to_project_guid = {} for family in families: project_guid = project_id_to_guid[family.pop('projectId')] family['projectGuid'] = project_guid family_guid_to_project_guid[family['familyGuid']] = project_guid for individual in individuals: family_guid = family_id_to_guid[individual.pop('familyId')] project_guid = family_guid_to_project_guid[family_guid] individual['familyGuid'] = family_guid individual['projectGuid'] = project_guid individual_guid_to_project_guid[ individual['individualGuid']] = project_guid for sample in samples: individual_guid = individual_id_to_guid[sample.pop('individualId')] sample['individualGuid'] = individual_guid sample['projectGuid'] = individual_guid_to_project_guid[ individual_guid] for sample in igv_samples: individual_guid = individual_id_to_guid[sample.pop('individualId')] sample['individualGuid'] = individual_guid sample['projectGuid'] = individual_guid_to_project_guid[ individual_guid] for group in analysis_groups: group['projectGuid'] = project_id_to_guid[group.pop('projectId')] individual_guids_by_family = defaultdict(list) for individual in individuals: individual_guids_by_family[individual['familyGuid']].append( individual['individualGuid']) for family in families: family['individualGuids'] = individual_guids_by_family[ family['familyGuid']] sample_guids_by_individual = defaultdict(list) for sample in samples: sample_guids_by_individual[sample['individualGuid']].append( sample['sampleGuid']) igv_sample_guids_by_individual = defaultdict(list) for sample in igv_samples: igv_sample_guids_by_individual[sample['individualGuid']].append( sample['sampleGuid']) for individual in individuals: individual['sampleGuids'] = sample_guids_by_individual[ individual['individualGuid']] individual['igvSampleGuids'] = igv_sample_guids_by_individual[ individual['individualGuid']] response = { 'projectsByGuid': {p['projectGuid']: p for p in projects_json}, 'familiesByGuid': {f['familyGuid']: f for f in families}, 'individualsByGuid': {i['individualGuid']: i for i in individuals}, 'samplesByGuid': {s['sampleGuid']: s for s in samples}, 'igvSamplesByGuid': {s['sampleGuid']: s for s in igv_samples}, 'locusListsByGuid': { ll['locusListGuid']: ll for ll in get_json_for_locus_lists(locus_lists, user) }, 'analysisGroupsByGuid': {ag['analysisGroupGuid']: ag for ag in analysis_groups}, } if project_category_guid: response['projectCategoriesByGuid'] = { project_category_guid: ProjectCategory.objects.get(guid=project_category_guid).json() } return response
def _get_projects_details(projects, user, project_category_guid=None): for project in projects: check_permissions(project, user) prefetch_related_objects(projects, 'can_view_group') project_models_by_guid = {project.guid: project for project in projects} projects_json = get_json_for_projects(projects, user) locus_lists = set() functional_data_tag_types = get_json_for_variant_functional_data_tag_types( ) for project_json in projects_json: project = project_models_by_guid[project_json['projectGuid']] project_locus_lists = get_project_locus_list_models(project) locus_lists.update(project_locus_lists) project_json.update({ 'locusListGuids': [locus_list.guid for locus_list in project_locus_lists], 'variantTagTypes': get_project_variant_tag_types(project), 'variantFunctionalTagTypes': functional_data_tag_types, }) families = _get_json_for_families( Family.objects.filter(project__in=projects), user, add_individual_guids_field=True) individuals = _get_json_for_individuals( Individual.objects.filter(family__project__in=projects), user=user, add_sample_guids_field=True) samples = get_json_for_samples( Sample.objects.filter(individual__family__project__in=projects)) analysis_groups = get_json_for_analysis_groups( AnalysisGroup.objects.filter(project__in=projects)) response = { 'projectsByGuid': {p['projectGuid']: p for p in projects_json}, 'familiesByGuid': {f['familyGuid']: f for f in families}, 'individualsByGuid': {i['individualGuid']: i for i in individuals}, 'samplesByGuid': {s['sampleGuid']: s for s in samples}, 'locusListsByGuid': { ll['locusListGuid']: ll for ll in get_json_for_locus_lists(list(locus_lists), user) }, 'analysisGroupsByGuid': {ag['analysisGroupGuid']: ag for ag in analysis_groups}, } if project_category_guid: response['projectCategoriesByGuid'] = { project_category_guid: ProjectCategory.objects.get(guid=project_category_guid).json() } return response
def receive_individuals_table_handler(request, project_guid): """Handler for the initial upload of an Excel or .tsv table of individuals. This handler parses the records, but doesn't save them in the database. Instead, it saves them to a temporary file and sends a 'uploadedFileId' representing this file back to the client. If/when the client then wants to 'apply' this table, it can send the uploadedFileId to the save_individuals_table(..) handler to actually save the data in the database. Args: request (object): Django request object project_guid (string): project GUID """ project = get_project_and_check_pm_permissions(project_guid, request.user) warnings = [] def process_records(json_records, filename='ped_file'): pedigree_records, errors, ped_warnings = parse_pedigree_table( json_records, filename, user=request.user, project=project) if errors: raise ErrorsWarningsException(errors, ped_warnings) nonlocal warnings warnings += ped_warnings return pedigree_records try: uploaded_file_id, filename, json_records = save_uploaded_file( request, process_records=process_records) except ErrorsWarningsException as e: return create_json_response( { 'errors': e.errors, 'warnings': e.warnings }, status=400, reason=e.errors) except Exception as e: return create_json_response({ 'errors': [str(e)], 'warnings': [] }, status=400, reason=str(e)) if warnings: # If there are warnings, it might be because the upload referenced valid existing individuals and there is no # issue, or because it referenced individuals that actually don't exist, so re-validate with all individuals family_ids = {r[JsonConstants.FAMILY_ID_COLUMN] for r in json_records} individual_ids = { r[JsonConstants.INDIVIDUAL_ID_COLUMN] for r in json_records } related_individuals = Individual.objects.filter( family__family_id__in=family_ids, family__project=project).exclude(individual_id__in=individual_ids) related_individuals_json = _get_json_for_individuals( related_individuals, project_guid=project_guid, family_fields=['family_id']) errors, _ = validate_fam_file_records(json_records + related_individuals_json, fail_on_warnings=True) if errors: return create_json_response({ 'errors': errors, 'warnings': [] }, status=400, reason=errors) # send back some stats individual_ids_by_family = defaultdict(list) for r in json_records: if r.get(JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN): individual_ids_by_family[r[JsonConstants.FAMILY_ID_COLUMN]].append( (r[JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN], True)) else: individual_ids_by_family[r[JsonConstants.FAMILY_ID_COLUMN]].append( (r[JsonConstants.INDIVIDUAL_ID_COLUMN], False)) num_individuals = sum( [len(indiv_ids) for indiv_ids in individual_ids_by_family.values()]) num_existing_individuals = 0 missing_prev_ids = [] for family_id, indiv_ids in individual_ids_by_family.items(): existing_individuals = { i.individual_id for i in Individual.objects.filter( individual_id__in=[indiv_id for (indiv_id, _) in indiv_ids], family__family_id=family_id, family__project=project).only('individual_id') } num_existing_individuals += len(existing_individuals) missing_prev_ids += [ indiv_id for (indiv_id, is_previous) in indiv_ids if is_previous and indiv_id not in existing_individuals ] num_individuals_to_create = num_individuals - num_existing_individuals if missing_prev_ids: return create_json_response( { 'errors': [ 'Could not find individuals with the following previous IDs: {}' .format(', '.join(missing_prev_ids)) ], 'warnings': [] }, status=400, reason='Invalid input') family_ids = set(r[JsonConstants.FAMILY_ID_COLUMN] for r in json_records) num_families = len(family_ids) num_existing_families = Family.objects.filter(family_id__in=family_ids, project=project).count() num_families_to_create = num_families - num_existing_families info = [ "{num_families} families, {num_individuals} individuals parsed from {filename}" .format(num_families=num_families, num_individuals=num_individuals, filename=filename), "{} new families, {} new individuals will be added to the project". format(num_families_to_create, num_individuals_to_create), "{} existing individuals will be updated".format( num_existing_individuals), ] response = { 'uploadedFileId': uploaded_file_id, 'errors': [], 'warnings': [], 'info': info, } logger.info(response) return create_json_response(response)
def _get_parsed_individuals(family, project_guid=None): """Uses HaploPainter to (re)generate the pedigree image for the given family. Args: family (object): seqr Family model. """ individuals = Individual.objects.filter(family=family) if len(individuals) < 2: update_seqr_model(family, pedigree_image=None) return None # convert individuals to json individual_records = { individual['individualId']: individual for individual in _get_json_for_individuals(individuals, project_guid=project_guid, family_guid=family.guid) } # compute a map of parent ids to list of children parent_ids_to_children_map = collections.defaultdict(list) for individual_id, individual_json in individual_records.items(): if not individual_json['paternalId'] and not individual_json['maternalId']: continue key = (individual_json['paternalId'], individual_json['maternalId']) parent_ids_to_children_map[key].append(individual_json) # generate placeholder individuals as needed, since HaploPainter1.043.pl doesn't support families with only 1 parent for ((paternal_id, maternal_id), children) in parent_ids_to_children_map.items(): for parent_id_key, parent_id, sex in [ ('paternalId', paternal_id, 'M'), ('maternalId', maternal_id, 'F') ]: if not parent_id or parent_id not in individual_records: placeholder_parent_id = 'placeholder_%s'% _random_string(10) placeholder_parent_json = { 'individualId': placeholder_parent_id, # fake indiv id 'paternalId': '', 'maternalId': '', 'sex': sex, 'affected': 'INVISIBLE', # use a special value to tell HaploPainter to draw this individual as '?' } for child_json in children: child_json[parent_id_key] = placeholder_parent_id individual_records[placeholder_parent_id] = placeholder_parent_json # convert to FAM file values SEX_TO_FAM_FILE_VALUE = {"M": "1", "F": "2", "U": "0"} AFFECTED_STATUS_TO_FAM_FILE_VALUE = {"A": "2", "N": "1", "U": "0", "INVISIBLE": "9"} # HaploPainter1.043.pl has been modified to hide individuals with affected-status='9' return { individual_id: { 'individualId': individual_id, 'paternalId': individual_json['paternalId'] or '0', 'maternalId': individual_json['maternalId'] or '0', 'sex': SEX_TO_FAM_FILE_VALUE[individual_json['sex']], 'affected': AFFECTED_STATUS_TO_FAM_FILE_VALUE[individual_json['affected']], } for individual_id, individual_json in individual_records.items() }
def handle(self, *args, **options): if options["index"]: es_indices = options["index"] elif options["use_project_indices_csv"]: with open('project_indices.csv') as csvfile: reader = csv.DictReader(csvfile) es_indices = {row['index'] for row in reader} else: projects_q = BaseProject.objects.filter(genome_version='37') for exclude_project in EXCLUDE_PROJECTS: projects_q = projects_q.exclude( project_name__icontains=exclude_project) indices_for_project = defaultdict(list) for project in projects_q: indices_for_project[project.get_elasticsearch_index()].append( project) indices_for_project.pop(None, None) seqr_projects = [] with open('project_indices.csv', 'wb') as csvfile: fieldnames = ['projectGuid', 'index'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for index, projects in indices_for_project.items(): for project in projects: seqr_projects.append(project.seqr_project) writer.writerow({ 'projectGuid': project.seqr_project.guid, 'index': index }) individuals = _get_json_for_individuals( Individual.objects.filter(family__project__in=seqr_projects)) with open('seqr_individuals.csv', 'wb') as csvfile: fieldnames = [ 'projectGuid', 'familyGuid', 'individualId', 'paternalId', 'maternalId', 'sex', 'affected' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore') writer.writeheader() for individual in individuals: writer.writerow(individual) es_indices = indices_for_project.keys() if not options["metadata_only"]: es_client = elasticsearch.Elasticsearch( host=settings.ELASTICSEARCH_SERVICE_HOSTNAME, timeout=10000) search = elasticsearch_dsl.Search(using=es_client, index='*,'.join(es_indices) + "*") search = search.query("match", mainTranscript_lof='HC') search = search.source([ 'contig', 'pos', 'ref', 'alt', '*num_alt', '*gq', '*ab', '*dp', '*ad' ]) print('Searching across {} indices...'.format(len(es_indices))) result_count_search = search.params(size=0) total = result_count_search.execute().hits.total print('Loading {} variants...'.format(total)) with open('lof_variants.csv', 'a') as csvfile: sample_fields = ['num_alt', 'gq', 'ab', 'dp', 'ad'] fieldnames = ['contig', 'pos', 'ref', 'alt', 'index' ] + sample_fields writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore') if not options["index"]: writer.writeheader() for i, hit in enumerate(search.scan()): result = {key: hit[key] for key in hit} result['index'] = hit.meta.index for field in sample_fields: result[field] = json.dumps({ key.rstrip('_{}'.format(field)): val for key, val in result.items() if key.endswith(field) }) writer.writerow(result) if i % 10000 == 0: print('Parsed {} variants'.format(i)) print('Loaded {} variants'.format(i)) print('Done')
def edit_individuals_handler(request, project_guid): """Modify one or more Individual records. Args: request (object): Django HTTP Request object. project_guid (string): GUID of project that contains these individuals. Request: body should be a json dictionary that contains a 'individuals' list that includes the individuals to update, represented by dictionaries of their guid and fields to update - for example: { 'individuals': [ { 'individualGuid': <individualGuid1>, 'paternalId': <paternalId>, 'affected': 'A' }, { 'individualGuid': <individualGuid1>, 'sex': 'U' }, ... [ } Response: json dictionary representing the updated individual(s) like: { <individualGuid1> : { individualId: xxx, sex: xxx, affected: xxx, ...}, <individualGuid2> : { individualId: xxx, sex: xxx, affected: xxx, ...}, ... } """ project = get_project_and_check_permissions(project_guid, request.user, CAN_EDIT) request_json = json.loads(request.body) modified_individuals_list = request_json.get('individuals') if modified_individuals_list is None: return create_json_response( {}, status=400, reason="'individuals' not specified") update_individuals = {ind['individualGuid']: ind for ind in modified_individuals_list} update_individual_models = {ind.guid: ind for ind in Individual.objects.filter(guid__in=update_individuals.keys())} for modified_ind in modified_individuals_list: model = update_individual_models[modified_ind['individualGuid']] if modified_ind[JsonConstants.INDIVIDUAL_ID_COLUMN] != model.individual_id: modified_ind[JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN] = model.individual_id modified_family_ids = {ind.get('familyId') or ind['family']['familyId'] for ind in modified_individuals_list} modified_family_ids.update({ind.family.family_id for ind in update_individual_models.values()}) related_individuals = Individual.objects.filter( family__family_id__in=modified_family_ids, family__project=project).exclude(guid__in=update_individuals.keys()) related_individuals_json = _get_json_for_individuals(related_individuals, project_guid=project_guid, family_fields=['family_id']) individuals_list = modified_individuals_list + related_individuals_json # TODO more validation? errors, warnings = validate_fam_file_records(individuals_list, fail_on_warnings=True) if errors: return create_json_response({'errors': errors, 'warnings': warnings}, status=400, reason='Invalid updates') try: updated_families, updated_individuals = add_or_update_individuals_and_families( project, modified_individuals_list, user=request.user ) except Exception as e: return create_json_response({'errors': [e.message]}, status=400, reason='Invalid updates') individuals_by_guid = { individual.guid: _get_json_for_individual(individual, request.user) for individual in updated_individuals } families_by_guid = { family.guid: _get_json_for_family(family, request.user, add_individual_guids_field=True) for family in updated_families } return create_json_response({ 'individualsByGuid': individuals_by_guid, 'familiesByGuid': families_by_guid, })
def anvil_export(request, project_guid): if project_guid == 'all': project_guid = None if project_guid: projects_by_guid = {project_guid: Project.objects.get(guid=project_guid)} else: projects_by_guid = {p.guid: p for p in Project.objects.filter(projectcategory__name__iexact='anvil')} families = _get_over_year_loaded_project_families(projects_by_guid.values()) prefetch_related_objects(families, 'individual_set') saved_variants_by_family = _get_saved_variants_by_family(projects_by_guid.values(), request.user) # Handle compound het genes compound_het_gene_id_by_family = {} for family_guid, saved_variants in saved_variants_by_family.items(): if len(saved_variants) > 1: potential_compound_het_variants = [ variant for variant in saved_variants if all(gen['numAlt'] < 2 for gen in variant['genotypes'].values()) ] main_gene_ids = {variant['mainTranscript']['geneId'] for variant in potential_compound_het_variants} if len(main_gene_ids) > 1: # This occurs in compound hets where some hits have a primary transcripts in different genes for gene_id in main_gene_ids: if all(gene_id in variant['transcripts'] for variant in potential_compound_het_variants): compound_het_gene_id_by_family[family_guid] = gene_id individuals = set() for family in families: individuals.update(family.individual_set.all()) rows = _get_json_for_individuals(list(individuals), project_guid=project_guid, family_fields=['family_id', 'coded_phenotype']) gene_ids = set() for row in rows: row['Project_ID'] = projects_by_guid[row['projectGuid']].name saved_variants = saved_variants_by_family[row['familyGuid']] row['numSavedVariants'] = len(saved_variants) for i, variant in enumerate(saved_variants): genotype = variant['genotypes'].get(row['individualGuid'], {}) if genotype.get('numAlt', -1) > 0: gene_id = compound_het_gene_id_by_family.get(row['familyGuid']) or variant['mainTranscript']['geneId'] gene_ids.add(gene_id) variant_fields = { 'Zygosity': 'heterozygous' if genotype['numAlt'] == 1 else 'homozygous', 'Chrom': variant['chrom'], 'Pos': variant['pos'], 'Ref': variant['ref'], 'Alt': variant['alt'], 'hgvsc': variant['mainTranscript']['hgvsc'], 'hgvsp': variant['mainTranscript']['hgvsp'], 'Transcript': variant['mainTranscript']['transcriptId'], 'geneId': gene_id, } row.update({'{}-{}'.format(k, i + 1): v for k, v in variant_fields.items()}) genes_by_id = get_genes(gene_ids) for row in rows: for key, gene_id in row.items(): if key.startswith('geneId') and genes_by_id.get(gene_id): row[key.replace('geneId', 'Gene')] = genes_by_id[gene_id]['geneSymbol'] return create_json_response({'anvilRows': rows})
def _get_projects_details(projects, user, project_category_guid=None): for project in projects: check_permissions(project, user) prefetch_related_objects(projects, 'can_view_group') project_models_by_guid = {project.guid: project for project in projects} projects_json = get_json_for_projects(projects, user) locus_lists = set() functional_data_tag_types = get_json_for_variant_functional_data_tag_types( ) variant_tag_types_by_guid = { vtt.guid: vtt for vtt in VariantTagType.objects.filter( Q(project__in=projects) | Q(project__isnull=True)).prefetch_related('project') } variant_tag_types = _get_json_for_models( variant_tag_types_by_guid.values()) for project_json in projects_json: project = project_models_by_guid[project_json['projectGuid']] project_locus_lists = get_project_locus_list_models(project) locus_lists.update(project_locus_lists) project_json.update({ 'locusListGuids': [locus_list.guid for locus_list in project_locus_lists], 'variantTagTypes': [ vtt for vtt in variant_tag_types if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']] .project.guid == project_json['projectGuid'] ], 'variantFunctionalTagTypes': functional_data_tag_types, }) families = _get_json_for_families( Family.objects.filter(project__in=projects), user) individuals = _get_json_for_individuals( Individual.objects.filter(family__project__in=projects), user=user) samples = get_json_for_samples( Sample.objects.filter(individual__family__project__in=projects)) analysis_groups = get_json_for_analysis_groups( AnalysisGroup.objects.filter(project__in=projects)) individual_guids_by_family = defaultdict(list) for individual in individuals: individual_guids_by_family[individual['familyGuid']].append( individual['individualGuid']) for family in families: family['individualGuids'] = individual_guids_by_family[ family['familyGuid']] sample_guids_by_individual = defaultdict(list) for sample in samples: sample_guids_by_individual[sample['individualGuid']].append( sample['sampleGuid']) for individual in individuals: individual['sampleGuids'] = sample_guids_by_individual[ individual['individualGuid']] response = { 'projectsByGuid': {p['projectGuid']: p for p in projects_json}, 'familiesByGuid': {f['familyGuid']: f for f in families}, 'individualsByGuid': {i['individualGuid']: i for i in individuals}, 'samplesByGuid': {s['sampleGuid']: s for s in samples}, 'locusListsByGuid': { ll['locusListGuid']: ll for ll in get_json_for_locus_lists(list(locus_lists), user) }, 'analysisGroupsByGuid': {ag['analysisGroupGuid']: ag for ag in analysis_groups}, } if project_category_guid: response['projectCategoriesByGuid'] = { project_category_guid: ProjectCategory.objects.get(guid=project_category_guid).json() } return response
def handle(self, *args, **options): if options["index"]: es_indices = options["index"] elif options["use_project_indices_csv"]: with open('project_indices.csv') as csvfile: reader = csv.DictReader(csvfile) es_indices = {row['index'] for row in reader} else: projects_q = BaseProject.objects.filter(genome_version='37') for exclude_project in EXCLUDE_PROJECTS: projects_q = projects_q.exclude(project_name__icontains=exclude_project) indices_for_project = defaultdict(list) for project in projects_q: indices_for_project[project.get_elasticsearch_index()].append(project) indices_for_project.pop(None, None) seqr_projects = [] with open('project_indices.csv', 'wb') as csvfile: fieldnames = ['projectGuid', 'index'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for index, projects in indices_for_project.items(): for project in projects: seqr_projects.append(project.seqr_project) writer.writerow({'projectGuid': project.seqr_project.guid, 'index': index}) individuals = _get_json_for_individuals(Individual.objects.filter(family__project__in=seqr_projects)) with open('seqr_individuals.csv', 'wb') as csvfile: fieldnames = ['projectGuid', 'familyGuid', 'individualId', 'paternalId', 'maternalId', 'sex', 'affected'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore') writer.writeheader() for individual in individuals: writer.writerow(individual) es_indices = indices_for_project.keys() if not options["metadata_only"]: es_client = elasticsearch.Elasticsearch(host=settings.ELASTICSEARCH_SERVICE_HOSTNAME, timeout=10000) search = elasticsearch_dsl.Search(using=es_client, index='*,'.join(es_indices) + "*") search = search.query("match", mainTranscript_lof='HC') search = search.source(['contig', 'pos', 'ref', 'alt', '*num_alt', '*gq', '*ab', '*dp', '*ad']) print('Searching across {} indices...'.format(len(es_indices))) result_count_search = search.params(size=0) total = result_count_search.execute().hits.total print('Loading {} variants...'.format(total)) with open('lof_variants.csv', 'a') as csvfile: sample_fields = ['num_alt', 'gq', 'ab', 'dp', 'ad'] fieldnames = ['contig', 'pos', 'ref', 'alt', 'index'] + sample_fields writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore') if not options["index"]: writer.writeheader() for i, hit in enumerate(search.scan()): result = {key: hit[key] for key in hit} result['index'] = hit.meta.index for field in sample_fields: result[field] = json.dumps({ key.rstrip('_{}'.format(field)): val for key, val in result.items() if key.endswith(field) }) writer.writerow(result) if i % 10000 == 0: print('Parsed {} variants'.format(i)) print('Loaded {} variants'.format(i)) print('Done')