def add_variants_dataset_handler(request, project_guid): """Create or update samples for the given variant dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'elasticsearchIndex': <String> (required) 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFilePath': <String> } Response body - will contain the following structure: """ project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) try: required_fields = ['elasticsearchIndex', 'datasetType'] if any(field not in request_json for field in required_fields): raise ValueError('request must contain fields: {}'.format( ', '.join(required_fields))) elasticsearch_index = request_json['elasticsearchIndex'].strip() dataset_type = request_json['datasetType'] if dataset_type not in Sample.DATASET_TYPE_LOOKUP: raise ValueError('Invalid dataset type "{}"'.format(dataset_type)) sample_ids, index_metadata = get_elasticsearch_index_samples( elasticsearch_index, dataset_type=dataset_type) if not sample_ids: raise ValueError( 'No samples found in the index. Make sure the specified caller type is correct' ) validate_index_metadata(index_metadata, project, elasticsearch_index, dataset_type=dataset_type) sample_type = index_metadata['sampleType'] sample_id_to_individual_id_mapping = load_mapping_file( request_json['mappingFilePath']) if request_json.get( 'mappingFilePath') else {} loaded_date = timezone.now() matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=dataset_type, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping= sample_id_to_individual_id_mapping, loaded_date=loaded_date, ) unmatched_samples = set(sample_ids) - set( matched_sample_id_to_sample_record.keys()) if request_json.get('ignoreExtraSamplesInCallset'): if len(matched_sample_id_to_sample_record) == 0: raise Exception( "None of the individuals or samples in the project matched the {} expected sample id(s)" .format(len(sample_ids))) elif len(unmatched_samples) > 0: raise Exception( 'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.' .format(", ".join(unmatched_samples))) prefetch_related_objects(matched_sample_id_to_sample_record.values(), 'individual__family') included_families = { sample.individual.family for sample in matched_sample_id_to_sample_record.values() } missing_individuals = Individual.objects.filter( family__in=included_families, sample__is_active=True, sample__dataset_type=dataset_type, ).exclude(sample__in=matched_sample_id_to_sample_record.values() ).select_related('family') missing_family_individuals = defaultdict(list) for individual in missing_individuals: missing_family_individuals[individual.family].append(individual) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}.' .format(', '.join( sorted([ '{} ({})'.format( family.family_id, ', '.join( sorted( [i.individual_id for i in missing_indivs]))) for family, missing_indivs in missing_family_individuals.items() ])))) inactivate_sample_guids = _update_variant_samples( matched_sample_id_to_sample_record, elasticsearch_index, loaded_date, dataset_type) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400) if not matched_sample_id_to_sample_record: return create_json_response({'samplesByGuid': {}}) family_guids_to_update = [ family.guid for family in included_families if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA ] Family.objects.filter(guid__in=family_guids_to_update).update( analysis_status=Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS) response_json = _get_samples_json(matched_sample_id_to_sample_record, inactivate_sample_guids, project_guid) response_json['familiesByGuid'] = { family_guid: { 'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS } for family_guid in family_guids_to_update } return create_json_response(response_json)
def handle(self, *args, **options): """transfer project""" project_arg = options['project'] elasticsearch_index = options['es_index'] project = Project.objects.get( Q(name=project_arg) | Q(guid=project_arg)) logger.info('Updating project genome version for {}'.format( project.name)) # Validate the provided index logger.info('Validating es index {}'.format(elasticsearch_index)) sample_ids, index_metadata = get_elasticsearch_index_samples( elasticsearch_index) validate_index_metadata(index_metadata, project, elasticsearch_index, genome_version=GENOME_VERSION_GRCh38) sample_type = index_metadata['sampleType'] dataset_path = index_metadata['sourceFilePath'] matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping={}, ) unmatched_samples = set(sample_ids) - set( matched_sample_id_to_sample_record.keys()) if len(unmatched_samples) > 0: raise CommandError( 'Matches not found for ES sample ids: {}.'.format( ', '.join(unmatched_samples))) prefetch_related_objects(matched_sample_id_to_sample_record.values(), 'individual__family') included_families = { sample.individual.family for sample in matched_sample_id_to_sample_record.values() } missing_individuals = Individual.objects.filter( family__in=included_families, sample__is_active=True, sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, ).exclude(sample__in=matched_sample_id_to_sample_record.values() ).select_related('family') missing_family_individuals = defaultdict(list) for individual in missing_individuals: missing_family_individuals[individual.family].append(individual) if missing_family_individuals: raise CommandError( 'The following families are included in the callset but are missing some family members: {}.' .format(', '.join([ '{} ({})'.format( family.family_id, ', '.join([i.individual_id for i in missing_indivs])) for family, missing_indivs in missing_family_individuals.items() ]))) # Get and clean up expected saved variants saved_variant_models_by_guid = { v.guid: v for v in SavedVariant.objects.filter(family__project=project) } deleted_no_tags = set() for guid, variant in saved_variant_models_by_guid.items(): if not (variant.varianttag_set.count() or variant.variantnote_set.count()): deleted_no_tags.add(guid) if deleted_no_tags: if raw_input( 'Do you want to delete the following {} saved variants with no tags (y/n)?: {} ' .format(len(deleted_no_tags), ', '.join(deleted_no_tags))) == 'y': for guid in deleted_no_tags: saved_variant_models_by_guid.pop(guid).delete() logger.info('Deleted {} variants'.format(len(deleted_no_tags))) expected_families = { sv.family for sv in saved_variant_models_by_guid.values() } missing_families = expected_families - included_families if missing_families: raise CommandError( 'The following families have saved variants but are missing from the callset: {}.' .format(', '.join([f.family_id for f in missing_families]))) # Lift-over saved variants _update_variant_samples(matched_sample_id_to_sample_record, elasticsearch_index, dataset_path) saved_variants = get_json_for_saved_variants( saved_variant_models_by_guid.values(), add_details=True) saved_variants_to_lift = [ v for v in saved_variants if v['genomeVersion'] != GENOME_VERSION_GRCh38 ] num_already_lifted = len(saved_variants) - len(saved_variants_to_lift) if num_already_lifted: if raw_input( 'Found {} saved variants already on Hg38. Continue with liftover (y/n)? ' .format(num_already_lifted)) != 'y': raise CommandError( 'Error: found {} saved variants already on Hg38'.format( num_already_lifted)) logger.info( 'Lifting over {} variants (skipping {} that are already lifted)'. format(len(saved_variants_to_lift), num_already_lifted)) liftover_to_38 = LiftOver('hg19', 'hg38') hg37_to_hg38_xpos = {} lift_failed = {} for v in saved_variants_to_lift: if not (hg37_to_hg38_xpos.get(v['xpos']) or v['xpos'] in lift_failed): hg38_coord = liftover_to_38.convert_coordinate( 'chr{}'.format(v['chrom'].lstrip('chr')), int(v['pos'])) if hg38_coord and hg38_coord[0]: hg37_to_hg38_xpos[v['xpos']] = get_xpos( hg38_coord[0][0], hg38_coord[0][1]) else: lift_failed[v['xpos']] = v if lift_failed: if raw_input( 'Unable to lift over the following {} coordinates. Continue with update (y/n)?: {} ' .format( len(lift_failed), ', '.join([ '{}:{}-{}-{} ({})'.format( v['chrom'], v['pos'], v['ref'], v['alt'], ', '.join(v['familyGuids'])) for v in lift_failed.values() ]))) != 'y': raise CommandError( 'Error: unable to lift over {} variants'.format( len(lift_failed))) saved_variants_map = defaultdict(list) for v in saved_variants_to_lift: if hg37_to_hg38_xpos.get(v['xpos']): variant_model = saved_variant_models_by_guid[v['variantGuid']] saved_variants_map[(hg37_to_hg38_xpos[v['xpos']], v['ref'], v['alt'])].append(variant_model) es_variants = get_es_variants_for_variant_tuples( expected_families, saved_variants_map.keys()) missing_variants = set( saved_variants_map.keys()) - {(v['xpos'], v['ref'], v['alt']) for v in es_variants} if missing_variants: missing_variant_strings = [] for xpos, ref, alt in missing_variants: var_id = '{}-{}-{}'.format(xpos, ref, alt) for v in saved_variants_map[(xpos, ref, alt)]: tags = v.varianttag_set.all() notes = v.variantnote_set.all() missing_variant_strings.append( '{var_id} {family_id}: {tags} ({guid})'.format( var_id=var_id, family_id=v.family.family_id, guid=v.guid, tags=', '.join([ tag.variant_tag_type.name for tag in tags ]) if tags else 'No Tags; {}'.format('; '.join( [note.note for note in notes])))) if raw_input( 'Unable to find the following {} variants in the index. Continue with update (y/n)?:\n{}\n' .format(len(missing_variants), '\n'.join(missing_variant_strings))) != 'y': raise CommandError( 'Error: unable to find {} lifted-over variants'.format( len(missing_variants))) logger.info('Successfully lifted over {} variants'.format( len(es_variants))) # Update saved variants missing_family_count = 0 for var in es_variants: saved_variant_models = saved_variants_map[(var['xpos'], var['ref'], var['alt'])] missing_saved_variants = [ v for v in saved_variant_models if v.family.guid not in var['familyGuids'] ] if missing_saved_variants: variant_id = '{}-{}-{}-{}'.format(var['chrom'], var['pos'], var['ref'], var['alt']) if raw_input( ('Variant {} (hg37: {}) not find for expected families {}. Continue with update (y/n)? ' .format( variant_id, missing_saved_variants[0].xpos, ', '.join([ '{} ({})'.format(v.family.guid, v.guid) for v in missing_saved_variants ])))) == 'y': var = get_single_es_variant( [v.family for v in saved_variant_models], variant_id, return_all_queried_families=True) missing_family_count += len(missing_saved_variants) else: raise CommandError( 'Error: unable to find family data for lifted over variant' ) for saved_variant in saved_variant_models: saved_variant.xpos_start = var['xpos'] saved_variant.saved_variant_json = var saved_variant.save() logger.info('Successfully updated {} variants'.format( len(es_variants))) # Update project and sample data update_model_from_json(project, {'genome_version': GENOME_VERSION_GRCh38}) reset_cached_search_results(project) logger.info('---Done---') logger.info( 'Succesfully lifted over {} variants. Skipped {} failed variants. Family data not updated for {} variants' .format(len(es_variants), len(missing_variants) + len(lift_failed), missing_family_count))
def add_variants_dataset_handler(request, project_guid): """Create or update samples for the given variant dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'elasticsearchIndex': <String> (required) 'ignoreExtraSamplesInCallset': <Boolean> 'ignoreMissingFamilyMembers': <Boolean> 'mappingFilePath': <String> } Response body - will contain the following structure: """ project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) try: if 'elasticsearchIndex' not in request_json: raise ValueError('"elasticsearchIndex" is required') elasticsearch_index = request_json['elasticsearchIndex'].strip() sample_ids, index_metadata = get_elasticsearch_index_samples(elasticsearch_index) validate_index_metadata(index_metadata, project, elasticsearch_index) sample_type = index_metadata['sampleType'] dataset_path = index_metadata['sourceFilePath'] sample_id_to_individual_id_mapping = load_mapping_file( request_json['mappingFilePath']) if request_json.get('mappingFilePath') else {} matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping=sample_id_to_individual_id_mapping, ) unmatched_samples = set(sample_ids) - set(matched_sample_id_to_sample_record.keys()) if request_json.get('ignoreExtraSamplesInCallset'): if len(matched_sample_id_to_sample_record) == 0: raise Exception( "None of the individuals or samples in the project matched the {} expected sample id(s)".format( len(sample_ids) )) elif len(unmatched_samples) > 0: raise Exception( 'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.'.format( ", ".join(unmatched_samples))) if not request_json.get('ignoreMissingFamilyMembers'): included_family_individuals = defaultdict(set) for sample in matched_sample_id_to_sample_record.values(): included_family_individuals[sample.individual.family].add(sample.individual.individual_id) missing_family_individuals = [] for family, individual_ids in included_family_individuals.items(): missing_indivs = family.individual_set.filter( sample__sample_status=Sample.SAMPLE_STATUS_LOADED, sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS ).exclude(individual_id__in=individual_ids) if missing_indivs: missing_family_individuals.append( '{} ({})'.format(family.family_id, ', '.join([i.individual_id for i in missing_indivs])) ) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}. This can lead to errors in variant search. If you still want to upload this callset, select the "Ignore missing family members" checkbox.'.format( ', '.join(missing_family_individuals) )) _update_samples( matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path ) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400) if not matched_sample_id_to_sample_record: return create_json_response({'samplesByGuid': {}}) update_project_from_json(project, {'has_new_search': True}) reset_cached_search_results(project) _deprecated_update_vcfffiles( project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record ) return create_json_response(_get_samples_json(matched_sample_id_to_sample_record, project_guid))
def add_variants_dataset_handler(request, project_guid): """Create or update samples for the given variant dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'elasticsearchIndex': <String> (required) 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFilePath': <String> } Response body - will contain the following structure: """ project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) try: if 'elasticsearchIndex' not in request_json: raise ValueError('"elasticsearchIndex" is required') elasticsearch_index = request_json['elasticsearchIndex'].strip() sample_ids, index_metadata = get_elasticsearch_index_samples( elasticsearch_index) validate_index_metadata(index_metadata, project, elasticsearch_index) sample_type = index_metadata['sampleType'] dataset_path = index_metadata['sourceFilePath'] sample_id_to_individual_id_mapping = load_mapping_file( request_json['mappingFilePath']) if request_json.get( 'mappingFilePath') else {} matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping= sample_id_to_individual_id_mapping, ) unmatched_samples = set(sample_ids) - set( matched_sample_id_to_sample_record.keys()) if request_json.get('ignoreExtraSamplesInCallset'): if len(matched_sample_id_to_sample_record) == 0: raise Exception( "None of the individuals or samples in the project matched the {} expected sample id(s)" .format(len(sample_ids))) elif len(unmatched_samples) > 0: raise Exception( 'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.' .format(", ".join(unmatched_samples))) included_family_individuals = defaultdict(set) for sample in matched_sample_id_to_sample_record.values(): included_family_individuals[sample.individual.family].add( sample.individual.individual_id) missing_family_individuals = [] for family, individual_ids in included_family_individuals.items(): missing_indivs = family.individual_set.filter( sample__sample_status=Sample.SAMPLE_STATUS_LOADED, sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS ).exclude(individual_id__in=individual_ids) if missing_indivs: missing_family_individuals.append('{} ({})'.format( family.family_id, ', '.join([i.individual_id for i in missing_indivs]))) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}.' .format(', '.join(missing_family_individuals))) _update_samples(matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400) if not matched_sample_id_to_sample_record: return create_json_response({'samplesByGuid': {}}) update_project_from_json(project, {'has_new_search': True}) reset_cached_search_results(project) update_xbrowse_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record) families_to_update = [ family for family in included_family_individuals.keys() if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA ] for family in families_to_update: update_model_from_json( family, {'analysis_status': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS}) response_json = _get_samples_json(matched_sample_id_to_sample_record, project_guid) response_json['familiesByGuid'] = { family.guid: { 'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS } for family in families_to_update } return create_json_response(response_json)
def handle(self, *args, **options): """transfer project""" project_arg = options['project'] elasticsearch_index = options['es_index'] project = Project.objects.get(Q(name=project_arg) | Q(guid=project_arg)) logger.info('Updating project genome version for {}'.format(project.name)) # Validate the provided index logger.info('Validating es index {}'.format(elasticsearch_index)) sample_ids, index_metadata = get_elasticsearch_index_samples(elasticsearch_index) validate_index_metadata(index_metadata, project, elasticsearch_index, genome_version=GENOME_VERSION_GRCh38) sample_type = index_metadata['sampleType'] dataset_path = index_metadata['sourceFilePath'] matched_sample_id_to_sample_record = match_sample_ids_to_sample_records( project=project, sample_ids=sample_ids, sample_type=sample_type, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, elasticsearch_index=elasticsearch_index, sample_id_to_individual_id_mapping={}, ) unmatched_samples = set(sample_ids) - set(matched_sample_id_to_sample_record.keys()) if len(unmatched_samples) > 0: raise Exception('Matches not found for ES sample ids: {}.'.format(', '.join(unmatched_samples))) included_family_individuals = defaultdict(set) individual_guids_by_id = {} for sample in matched_sample_id_to_sample_record.values(): included_family_individuals[sample.individual.family].add(sample.individual.individual_id) individual_guids_by_id[sample.individual.individual_id] = sample.individual.guid missing_family_individuals = [] for family, individual_ids in included_family_individuals.items(): missing_indivs = family.individual_set.filter( sample__sample_status=Sample.SAMPLE_STATUS_LOADED, sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS ).exclude(individual_id__in=individual_ids) if missing_indivs: missing_family_individuals.append( '{} ({})'.format(family.family_id, ', '.join([i.individual_id for i in missing_indivs])) ) if missing_family_individuals: raise Exception( 'The following families are included in the callset but are missing some family members: {}.'.format( ', '.join(missing_family_individuals) )) # Get and clean up expected saved variants saved_variant_models_by_guid = {v.guid: v for v in SavedVariant.objects.filter(project=project)} deleted_no_family = set() deleted_no_tags = set() for guid, variant in saved_variant_models_by_guid.items(): if not variant.family: deleted_no_family.add(guid) elif not (variant.varianttag_set.count() or variant.variantnote_set.count()): deleted_no_tags.add(guid) if deleted_no_family: if raw_input('Do you want to delete the following {} saved variants with no family (y/n)?: {} '.format( len(deleted_no_family), ', '.join(deleted_no_family))) == 'y': for guid in deleted_no_family: saved_variant_models_by_guid.pop(guid).delete() logger.info('Deleted {} variants'.format(len(deleted_no_family))) if deleted_no_tags: if raw_input('Do you want to delete the following {} saved variants with no tags (y/n)?: {} '.format( len(deleted_no_tags), ', '.join(deleted_no_tags))) == 'y': for guid in deleted_no_tags: saved_variant_models_by_guid.pop(guid).delete() logger.info('Deleted {} variants'.format(len(deleted_no_tags))) expected_families = {sv.family for sv in saved_variant_models_by_guid.values()} missing_families = expected_families - set(included_family_individuals.keys()) if missing_families: raise Exception( 'The following families have saved variants but are missing from the callset: {}.'.format( ', '.join([f.family_id for f in missing_families]) )) # Lift-over saved variants saved_variants = get_json_for_saved_variants( saved_variant_models_by_guid.values(), add_details=True, project=project, individual_guids_by_id=individual_guids_by_id) saved_variants_to_lift = [v for v in saved_variants if v['genomeVersion'] != GENOME_VERSION_GRCh38] num_already_lifted = len(saved_variants) - len(saved_variants_to_lift) if num_already_lifted: if raw_input('Found {} saved variants already on Hg38. Continue with liftover (y/n)?'.format(num_already_lifted)) != 'y': raise Exception('Error: found {} saved variants already on Hg38'.format(num_already_lifted)) logger.info('Lifting over {} variants (skipping {} that are already lifted)'.format( len(saved_variants_to_lift), num_already_lifted)) liftover_to_38 = LiftOver('hg19', 'hg38') hg37_to_hg38_xpos = {} lift_failed = set() for v in saved_variants_to_lift: if not (hg37_to_hg38_xpos.get(v['xpos']) or v['xpos'] in lift_failed): hg38_coord = liftover_to_38.convert_coordinate('chr{}'.format(v['chrom'].lstrip('chr')), int(v['pos'])) if hg38_coord and hg38_coord[0]: hg37_to_hg38_xpos[v['xpos']] = get_xpos(hg38_coord[0][0], hg38_coord[0][1]) else: lift_failed.add(v['xpos']) if lift_failed: raise Exception( 'Unable to lift over the following {} coordinates: {}'.format(len(lift_failed), ', '.join(lift_failed))) saved_variants_map = defaultdict(list) for v in saved_variants_to_lift: variant_model = saved_variant_models_by_guid[v['variantGuid']] saved_variants_map[(hg37_to_hg38_xpos[v['xpos']], v['ref'], v['alt'])].append(variant_model) es_variants = get_es_variants_for_variant_tuples(expected_families, saved_variants_map.keys()) missing_variants = set(saved_variants_map.keys()) - {(v['xpos'], v['ref'], v['alt']) for v in es_variants} if missing_variants: missing_variant_strings = ['{}-{}-{} ({})'.format( xpos, ref, alt, ', '.join(['{}: {}'.format(v.family.family_id, v.guid) for v in saved_variants_map[(xpos, ref, alt)]])) for xpos, ref, alt in missing_variants] if raw_input('Unable to find the following {} variants in the index. Continue with update (y/n)?: {} '.format( len(missing_variants), ', '.join(missing_variant_strings))) != 'y': raise Exception('Error: unable to find {} lifted-over variants'.format(len(missing_variants))) logger.info('Successfully lifted over {} variants'.format(len(es_variants))) # Update saved variants for var in es_variants: saved_variant_models = saved_variants_map[(var['xpos'], var['ref'], var['alt'])] missing_families = [v.family.guid for v in saved_variant_models if v.family.guid not in var['familyGuids']] if missing_families: raise Exception('Error with variant {}:{}-{}-{} not find for expected families {}; found in families {}'.format( var['chrom'], var['pos'], var['ref'], var['alt'], ', '.join(missing_families), ', '.join(var['familyGuids']) )) for saved_variant in saved_variant_models: saved_variant.xpos_start = var['xpos'] saved_variant.saved_variant_json = json.dumps(var) saved_variant.save() logger.info('Successfully updated {} variants'.format(len(es_variants))) # Update project and sample data update_model_from_json(project, {'genome_version': GENOME_VERSION_GRCh38, 'has_new_search': True}) _update_samples( matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path ) update_xbrowse_vcfffiles( project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record ) reset_cached_search_results(project) logger.info('---Done---') logger.info('Succesfully lifted over {} variants. Skipped {} failed variants.'.format( len(es_variants), len(missing_variants)))