def _get_samples_json(matched_sample_id_to_sample_record, project_guid): updated_sample_json = get_json_for_samples(matched_sample_id_to_sample_record.values(), project_guid=project_guid) response = { 'samplesByGuid': {s['sampleGuid']: s for s in updated_sample_json} } updated_individuals = {s['individualGuid'] for s in updated_sample_json} if updated_individuals: individuals = Individual.objects.filter(guid__in=updated_individuals).prefetch_related('sample_set', 'family').only('guid') response['individualsByGuid'] = { ind.guid: {'sampleGuids': [s.guid for s in ind.sample_set.only('guid').all()]} for ind in individuals } return response
def _get_samples_json(matched_sample_id_to_sample_record, inactivate_sample_guids, project_guid): updated_sample_json = get_json_for_samples(matched_sample_id_to_sample_record.values(), project_guid=project_guid) sample_response = {sample_guid: {'isActive': False} for sample_guid in inactivate_sample_guids} sample_response.update({s['sampleGuid']: s for s in updated_sample_json}) response = { 'samplesByGuid': sample_response } updated_individuals = {s['individualGuid'] for s in updated_sample_json} if updated_individuals: individuals = Individual.objects.filter(guid__in=updated_individuals).prefetch_related('sample_set') response['individualsByGuid'] = { ind.guid: {'sampleGuids': [s.guid for s in ind.sample_set.all()]} for ind in individuals } return response
def _retrieve_samples(project_guid, individuals_by_guid, sample_models, sample_guid_key='sampleGuids'): """Retrieves sample metadata for the given project. Args: project_guid (string): project_guid individuals_by_guid (dict): maps each individual_guid to a dictionary with individual info. This method adds a "sampleGuids" list to each of these dictionaries. Returns: 2-tuple with dictionaries: (samples_by_guid, sample_batches_by_guid) """ samples = get_json_for_samples(sample_models, project_guid=project_guid) samples_by_guid = {} for s in samples: sample_guid = s['sampleGuid'] samples_by_guid[sample_guid] = s individual_guid = s['individualGuid'] individuals_by_guid[individual_guid][sample_guid_key].add(sample_guid) return samples_by_guid
def add_dataset_handler(request, project_guid): """Create or update samples for the given dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'sampleType': <"WGS", "WES", or "RNA"> (required) 'datasetType': <"VARIANTS", or "ALIGN"> (required) 'elasticsearchIndex': <String> 'datasetPath': <String> 'datasetName': <String> 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> } } Response body - will contain the following structure: """ logger.info("add_dataset_handler: " + str(request)) project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) logger.info("add_dataset_handler: received %s" % pformat(request_json)) required_fields = ['sampleType', 'datasetType'] if any(field not in request_json for field in required_fields): raise ValueError("request must contain fields: {}".format( ', '.join(required_fields))) sample_type = request_json['sampleType'] dataset_type = request_json['datasetType'] elasticsearch_index = request_json.get('elasticsearchIndex') if elasticsearch_index: elasticsearch_index = elasticsearch_index.strip() dataset_path = request_json.get('datasetPath') if dataset_path: dataset_path = dataset_path.strip() dataset_name = request_json.get('datasetName') if dataset_name: dataset_name = dataset_name.strip() ignore_extra_samples_in_callset = request_json.get( 'ignoreExtraSamplesInCallset') ignore_missing_family_members = request_json.get( 'ignoreMissingFamilyMembers') mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId') mapping_file_path = request_json.get('mappingFilePath') try: updated_samples, created_sample_ids = add_dataset( project=project, sample_type=sample_type, dataset_type=dataset_type, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path, dataset_name=dataset_name, max_edit_distance=0, ignore_extra_samples_in_callset=ignore_extra_samples_in_callset, ignore_missing_family_members=ignore_missing_family_members, mapping_file_path=mapping_file_path, mapping_file_id=mapping_file_id, ) # update VCFFile records if updated_samples: if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: base_project = BaseProject.objects.get(seqr_project=project) get_datastore(base_project).bust_project_cache( base_project.project_id) clear_project_results_cache(base_project.project_id) vcf_file = VCFFile.objects.filter( project=base_project, dataset_type=dataset_type, sample_type=sample_type, elasticsearch_index=elasticsearch_index).order_by( '-pk').first() if not vcf_file: vcf_file = VCFFile.objects.create( project=base_project, dataset_type=dataset_type, sample_type=sample_type, elasticsearch_index=elasticsearch_index, ) logger.info("Created vcf file: " + str(vcf_file.__dict__)) vcf_file.file_path = dataset_path or "{}.vcf.gz".format( elasticsearch_index ) # legacy VCFFile model requires non-empty vcf path vcf_file.loaded_date = iter(updated_samples).next().loaded_date vcf_file.save() for indiv in [s.individual for s in updated_samples]: for base_indiv in BaseIndividual.objects.filter( seqr_individual=indiv).only('id'): base_indiv.vcf_files.add(vcf_file) elif dataset_type == Sample.DATASET_TYPE_READ_ALIGNMENTS: for sample in updated_samples: for base_indiv in BaseIndividual.objects.filter( seqr_individual=sample.individual).only('id'): base_indiv.bam_file_path = sample.dataset_file_path base_indiv.save() updated_sample_json = get_json_for_samples(updated_samples, project_guid=project_guid) response = { 'samplesByGuid': {s['sampleGuid']: s for s in updated_sample_json} } updated_individuals = { s['individualGuid'] for s in updated_sample_json if s['sampleId'] in created_sample_ids } if updated_individuals: individuals = Individual.objects.filter( guid__in=updated_individuals).prefetch_related( 'sample_set', 'family').only('guid') response['individualsByGuid'] = { ind.guid: { 'sampleGuids': [s.guid for s in ind.sample_set.only('guid').all()] } for ind in individuals } for ind in individuals: family = ind.family if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA: update_seqr_model(family, analysis_status=Family. ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS) return create_json_response(response) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400)
def _get_projects_details(projects, user, project_category_guid=None): for project in projects: check_permissions(project, user) prefetch_related_objects(projects, 'can_view_group') project_models_by_guid = {project.guid: project for project in projects} projects_json = get_json_for_projects(projects, user) locus_lists = set() functional_data_tag_types = get_json_for_variant_functional_data_tag_types( ) variant_tag_types_by_guid = { vtt.guid: vtt for vtt in VariantTagType.objects.filter( Q(project__in=projects) | Q(project__isnull=True)).prefetch_related('project') } variant_tag_types = _get_json_for_models( variant_tag_types_by_guid.values()) for project_json in projects_json: project = project_models_by_guid[project_json['projectGuid']] project_locus_lists = get_project_locus_list_models(project) locus_lists.update(project_locus_lists) project_json.update({ 'locusListGuids': [locus_list.guid for locus_list in project_locus_lists], 'variantTagTypes': [ vtt for vtt in variant_tag_types if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']] .project.guid == project_json['projectGuid'] ], 'variantFunctionalTagTypes': functional_data_tag_types, }) families = _get_json_for_families( Family.objects.filter(project__in=projects), user) individuals = _get_json_for_individuals( Individual.objects.filter(family__project__in=projects), user=user) samples = get_json_for_samples( Sample.objects.filter(individual__family__project__in=projects)) analysis_groups = get_json_for_analysis_groups( AnalysisGroup.objects.filter(project__in=projects)) individual_guids_by_family = defaultdict(list) for individual in individuals: individual_guids_by_family[individual['familyGuid']].append( individual['individualGuid']) for family in families: family['individualGuids'] = individual_guids_by_family[ family['familyGuid']] sample_guids_by_individual = defaultdict(list) for sample in samples: sample_guids_by_individual[sample['individualGuid']].append( sample['sampleGuid']) for individual in individuals: individual['sampleGuids'] = sample_guids_by_individual[ individual['individualGuid']] response = { 'projectsByGuid': {p['projectGuid']: p for p in projects_json}, 'familiesByGuid': {f['familyGuid']: f for f in families}, 'individualsByGuid': {i['individualGuid']: i for i in individuals}, 'samplesByGuid': {s['sampleGuid']: s for s in samples}, 'locusListsByGuid': { ll['locusListGuid']: ll for ll in get_json_for_locus_lists(list(locus_lists), user) }, 'analysisGroupsByGuid': {ag['analysisGroupGuid']: ag for ag in analysis_groups}, } if project_category_guid: response['projectCategoriesByGuid'] = { project_category_guid: ProjectCategory.objects.get(guid=project_category_guid).json() } return response
def _get_projects_details(projects, user, project_category_guid=None): for project in projects: check_project_permissions(project, user) prefetch_related_objects(projects, 'can_view_group') project_models_by_guid = {project.guid: project for project in projects} projects_json = get_json_for_projects(projects, user) locus_lists = LocusList.objects.filter( projects__in=projects).prefetch_related('projects') project_guid = projects[0].guid if len(projects) == 1 else None functional_data_tag_types = get_json_for_variant_functional_data_tag_types( ) variant_tag_types_by_guid = { vtt.guid: vtt for vtt in VariantTagType.objects.filter( Q(project__in=projects) | Q(project__isnull=True)).prefetch_related('project') } variant_tag_types = _get_json_for_models( list(variant_tag_types_by_guid.values())) for project_json in projects_json: project = project_models_by_guid[project_json['projectGuid']] project_json.update({ 'locusListGuids': [ locus_list.guid for locus_list in locus_lists if project in locus_list.projects.all() ], 'variantTagTypes': [ vtt for vtt in variant_tag_types if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']] .project.guid == project_json['projectGuid'] ], 'variantFunctionalTagTypes': functional_data_tag_types, }) family_models = Family.objects.filter(project__in=projects) families = _get_json_for_families(family_models, user, project_guid=project_guid, skip_nested=True) individual_models = Individual.objects.filter(family__in=family_models) individuals = _get_json_for_individuals(individual_models, user=user, project_guid=project_guid, add_hpo_details=True, skip_nested=True) sample_models = Sample.objects.filter(individual__in=individual_models) samples = get_json_for_samples(sample_models, project_guid=project_guid, skip_nested=True) igv_sample_models = IgvSample.objects.filter( individual__in=individual_models) igv_samples = get_json_for_samples(igv_sample_models, project_guid=project_guid, skip_nested=True) analysis_group_models = AnalysisGroup.objects.filter(project__in=projects) analysis_groups = get_json_for_analysis_groups(analysis_group_models, project_guid=project_guid, skip_nested=True) if not project_guid: project_id_to_guid = {project.id: project.guid for project in projects} family_id_to_guid = { family.id: family.guid for family in family_models } individual_id_to_guid = { individual.id: individual.guid for individual in individual_models } family_guid_to_project_guid = {} individual_guid_to_project_guid = {} for family in families: project_guid = project_id_to_guid[family.pop('projectId')] family['projectGuid'] = project_guid family_guid_to_project_guid[family['familyGuid']] = project_guid for individual in individuals: family_guid = family_id_to_guid[individual.pop('familyId')] project_guid = family_guid_to_project_guid[family_guid] individual['familyGuid'] = family_guid individual['projectGuid'] = project_guid individual_guid_to_project_guid[ individual['individualGuid']] = project_guid for sample in samples: individual_guid = individual_id_to_guid[sample.pop('individualId')] sample['individualGuid'] = individual_guid sample['projectGuid'] = individual_guid_to_project_guid[ individual_guid] for sample in igv_samples: individual_guid = individual_id_to_guid[sample.pop('individualId')] sample['individualGuid'] = individual_guid sample['projectGuid'] = individual_guid_to_project_guid[ individual_guid] for group in analysis_groups: group['projectGuid'] = project_id_to_guid[group.pop('projectId')] individual_guids_by_family = defaultdict(list) for individual in individuals: individual_guids_by_family[individual['familyGuid']].append( individual['individualGuid']) for family in families: family['individualGuids'] = individual_guids_by_family[ family['familyGuid']] sample_guids_by_individual = defaultdict(list) for sample in samples: sample_guids_by_individual[sample['individualGuid']].append( sample['sampleGuid']) igv_sample_guids_by_individual = defaultdict(list) for sample in igv_samples: igv_sample_guids_by_individual[sample['individualGuid']].append( sample['sampleGuid']) for individual in individuals: individual['sampleGuids'] = sample_guids_by_individual[ individual['individualGuid']] individual['igvSampleGuids'] = igv_sample_guids_by_individual[ individual['individualGuid']] response = { 'projectsByGuid': {p['projectGuid']: p for p in projects_json}, 'familiesByGuid': {f['familyGuid']: f for f in families}, 'individualsByGuid': {i['individualGuid']: i for i in individuals}, 'samplesByGuid': {s['sampleGuid']: s for s in samples}, 'igvSamplesByGuid': {s['sampleGuid']: s for s in igv_samples}, 'locusListsByGuid': { ll['locusListGuid']: ll for ll in get_json_for_locus_lists(locus_lists, user) }, 'analysisGroupsByGuid': {ag['analysisGroupGuid']: ag for ag in analysis_groups}, } if project_category_guid: response['projectCategoriesByGuid'] = { project_category_guid: ProjectCategory.objects.get(guid=project_category_guid).json() } return response
def _get_projects_details(projects, user, project_category_guid=None): for project in projects: check_permissions(project, user) prefetch_related_objects(projects, 'can_view_group') project_models_by_guid = {project.guid: project for project in projects} projects_json = get_json_for_projects(projects, user) locus_lists = set() functional_data_tag_types = get_json_for_variant_functional_data_tag_types( ) for project_json in projects_json: project = project_models_by_guid[project_json['projectGuid']] project_locus_lists = get_project_locus_list_models(project) locus_lists.update(project_locus_lists) project_json.update({ 'locusListGuids': [locus_list.guid for locus_list in project_locus_lists], 'variantTagTypes': get_project_variant_tag_types(project), 'variantFunctionalTagTypes': functional_data_tag_types, }) families = _get_json_for_families( Family.objects.filter(project__in=projects), user, add_individual_guids_field=True) individuals = _get_json_for_individuals( Individual.objects.filter(family__project__in=projects), user=user, add_sample_guids_field=True) samples = get_json_for_samples( Sample.objects.filter(individual__family__project__in=projects)) analysis_groups = get_json_for_analysis_groups( AnalysisGroup.objects.filter(project__in=projects)) response = { 'projectsByGuid': {p['projectGuid']: p for p in projects_json}, 'familiesByGuid': {f['familyGuid']: f for f in families}, 'individualsByGuid': {i['individualGuid']: i for i in individuals}, 'samplesByGuid': {s['sampleGuid']: s for s in samples}, 'locusListsByGuid': { ll['locusListGuid']: ll for ll in get_json_for_locus_lists(list(locus_lists), user) }, 'analysisGroupsByGuid': {ag['analysisGroupGuid']: ag for ag in analysis_groups}, } if project_category_guid: response['projectCategoriesByGuid'] = { project_category_guid: ProjectCategory.objects.get(guid=project_category_guid).json() } return response
def add_dataset_handler(request, project_guid): """Create or update samples for the given dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'sampleType': <"WGS", "WES", or "RNA"> (required) 'datasetType': <"VARIANTS", or "ALIGN"> (required) 'elasticsearchIndex': <String> 'datasetPath': <String> 'datasetName': <String> 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> } } Response body - will contain the following structure: """ logger.info("add_dataset_handler: " + str(request)) project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) logger.info("add_dataset_handler: received %s" % pformat(request_json)) required_fields = ['sampleType', 'datasetType'] if any(field not in request_json for field in required_fields): raise ValueError("request must contain fields: {}".format( ', '.join(required_fields))) sample_type = request_json['sampleType'] dataset_type = request_json['datasetType'] elasticsearch_index = request_json.get('elasticsearchIndex') dataset_path = request_json.get('datasetPath') dataset_name = request_json.get('datasetName') ignore_extra_samples_in_callset = request_json.get( 'ignoreExtraSamplesInCallset') mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId') try: updated_samples, created_sample_ids = add_dataset( project=project, sample_type=sample_type, dataset_type=dataset_type, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path, dataset_name=dataset_name, max_edit_distance=0, ignore_extra_samples_in_callset=ignore_extra_samples_in_callset, mapping_file_id=mapping_file_id, ) updated_sample_json = get_json_for_samples(updated_samples, project_guid=project_guid) response = { 'samplesByGuid': {s['sampleGuid']: s for s in updated_sample_json} } updated_individuals = { s['individualGuid'] for s in updated_sample_json if s['sampleId'] in created_sample_ids } if updated_individuals: individuals = Individual.objects.filter( guid__in=updated_individuals).prefetch_related( 'sample_set').only('guid') response['individualsByGuid'] = { ind.guid: { 'sampleGuids': [s.guid for s in ind.sample_set.only('guid').all()] } for ind in individuals } return create_json_response(response) except Exception as e: return create_json_response({'errors': [e.message or str(e)]}, status=400)