def handle(self, *args, **options): project_id = options["project_id"] family_ids = args project = Project.objects.get(project_id=project_id) already_deleted_once = set() # set of family ids for which get_datastore(project_id).delete_family has already been called once for vcf_file, families in project.families_by_vcf().items(): families_to_load = [] for family in families: family_id = family.family_id print("Checking id: " + family_id) if not family_ids or family.family_id not in family_ids: continue # delete this family if family_id not in already_deleted_once: get_datastore(project_id).delete_family(project_id, family_id) already_deleted_once.add(family_id) families_to_load.append(family) # reload family print("Loading %(project_id)s %(families_to_load)s" % locals()) xbrowse_controls.load_variants_for_family_list(project, families_to_load, vcf_file)
def get_data_status(self): if not self.has_variant_data(): return 'no_variants' elif not get_datastore(self.project.project_id).family_exists(self.project.project_id, self.cohort_id): return 'not_loaded' else: return get_datastore(self.project.project_id).get_family_status(self.project.project_id, self.cohort_id)
def _deprecated_update_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record): base_project = BaseProject.objects.get(seqr_project=project) get_datastore(base_project).bust_project_cache(base_project.project_id) clear_project_results_cache(base_project.project_id) vcf_file = VCFFile.objects.filter( project=base_project, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=sample_type, elasticsearch_index=elasticsearch_index).order_by('-pk').first() if not vcf_file: vcf_file = VCFFile.objects.create( project=base_project, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=sample_type, elasticsearch_index=elasticsearch_index, ) logger.info("Created vcf file: " + str(vcf_file.__dict__)) vcf_file.file_path = dataset_path vcf_file.loaded_date = matched_sample_id_to_sample_record.values()[0].loaded_date vcf_file.save() for indiv in [s.individual for s in matched_sample_id_to_sample_record.values()]: for base_indiv in BaseIndividual.objects.filter(seqr_individual=indiv).only('id'): base_indiv.vcf_files.add(vcf_file)
def handle(self, *args, **options): project_id = options["project_id"] family_ids = options["family_ids"] project = Project.objects.get(project_id=project_id) already_deleted_once = set( ) # set of family ids for which get_datastore(project_id).delete_family has already been called once for vcf_file, families in project.families_by_vcf().items(): families_to_load = [] for family in families: family_id = family.family_id print("Checking id: " + family_id) if not family_ids or family.family_id not in family_ids: continue # delete this family if family_id not in already_deleted_once: get_datastore(project_id).delete_family( project_id, family_id) already_deleted_once.add(family_id) families_to_load.append(family) # reload family print("Loading %(project_id)s %(families_to_load)s" % locals()) xbrowse_controls.load_variants_for_family_list( project, families_to_load, vcf_file)
def update_xbrowse_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record): base_project = find_matching_xbrowse_model(project) get_datastore(base_project).bust_project_cache(base_project.project_id) clear_project_results_cache(base_project.project_id) vcf_file = VCFFile.objects.filter( project=base_project, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=sample_type, elasticsearch_index=elasticsearch_index).order_by('-pk').first() if not vcf_file: vcf_file = VCFFile.objects.create( project=base_project, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=sample_type, elasticsearch_index=elasticsearch_index, ) logging.info("Created vcf file: " + str(vcf_file.__dict__)) vcf_file.file_path = dataset_path vcf_file.loaded_date = matched_sample_id_to_sample_record.values()[0].loaded_date vcf_file.save() base_individuals = BaseIndividual.objects.filter( seqr_individual_id__in=[s.individual_id for s in matched_sample_id_to_sample_record.values()] ) for base_indiv in base_individuals: base_indiv.vcf_files.add(vcf_file)
def handle(self, *args, **options): project_id = options["project_id"] family_ids = options["family_ids"] project = Project.objects.get(project_id=project_id) already_deleted_once = set() # set of family ids for which get_datastore(project_id).delete_family has already been called once for vcf_file, families in project.families_by_vcf().items(): print("Checking families %s in vcf %s" % (families, vcf_file)) families_to_load = [] for family in families: family_id = family.family_id if not family_ids or family.family_id not in family_ids: continue print("Processing family: " + family_id) # delete data for this family if family_id not in already_deleted_once: print("Deleting variant data for family: " + family_id) get_datastore(project).delete_family(project_id, family_id) already_deleted_once.add(family_id) families_to_load.append(family) if len(families_to_load) > 0: # reload family print("Loading %(project_id)s %(families_to_load)s" % locals()) xbrowse_controls.load_variants_for_family_list(project, families_to_load, vcf_file) else: print("0 matching families found in this VCF") print("Finished.")
def calculate_mendelian_variant_search(search_spec, xfamily): variants = None if search_spec.search_mode == 'standard_inheritance': variants = list(get_variants_with_inheritance_mode( get_mall(), xfamily, search_spec.inheritance_mode, variant_filter=search_spec.variant_filter, quality_filter=search_spec.genotype_quality_filter, )) elif search_spec.search_mode == 'custom_inheritance': variants = list(get_variants_family( get_datastore(), xfamily, genotype_filter=search_spec.genotype_inheritance_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.genotype_quality_filter, )) elif search_spec.search_mode == 'gene_burden': gene_stream = get_genes_family( get_datastore(), get_reference(), xfamily, burden_filter=search_spec.gene_burden_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.genotype_quality_filter, ) variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference())) elif search_spec.search_mode == 'allele_count': variants = list(get_variants_allele_count( get_datastore(), xfamily, search_spec.allele_count_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.genotype_quality_filter, )) elif search_spec.search_mode == 'all_variants': variants = list(get_variants_family( get_datastore(), xfamily, variant_filter=search_spec.variant_filter, quality_filter=search_spec.genotype_quality_filter, )) return variants
def update_pop_freqs_in_family_tables(self): # Load family tables population_frequency_store = mall.get_annotator().get_population_frequency_store() db = sqlite3.connect("reference_populations_family_tables.db", isolation_level=None) db.execute( "CREATE TABLE if not exists all_projects(project_id varchar(200), family_id varchar(200), started bool, finished bool)" ) db.execute("CREATE UNIQUE INDEX IF NOT EXISTS all_projects_idx ON all_projects(project_id, family_id)") for project in Project.objects.all().order_by("-last_accessed_date"): project_id = project.project_id datastore = get_datastore(project_id) for i, family_info in enumerate(datastore._get_family_info(project_id)): family_id = family_info["family_id"] db.execute("INSERT OR IGNORE INTO all_projects VALUES (?, ?, 0, 0)", (project_id, family_id)) # Go through each project in decending order population_slugs_to_load = [ population_spec["slug"] for population_spec in annotator_settings.reference_populations_to_load ] while True: remaining_work = list( db.execute("SELECT project_id, family_id FROM all_projects WHERE started=0 ORDER BY RANDOM()") ) print("%d projects / families remaining" % len(remaining_work)) if not remaining_work: print("Done with all projects/families") break project_id, family_id = remaining_work[0] datastore = get_datastore(project_id) print(" updating %s / %s" % (project_id, family_id)) db.execute("UPDATE all_projects SET started=1 WHERE project_id=? AND family_id=?", (project_id, family_id)) family_collection = datastore._get_family_collection(project_id, family_id) for variant_dict in family_collection.find(): freqs = population_frequency_store.get_frequencies( variant_dict["xpos"], variant_dict["ref"], variant_dict["alt"] ) full_freqs = { "db_freqs." + population_slug: freqs.get(population_slug, 0) for population_slug in population_slugs_to_load } family_collection.update( {"xpos": variant_dict["xpos"], "ref": variant_dict["ref"], "alt": variant_dict["alt"]}, {"$set": full_freqs}, upsert=False, ) # print("---------\nvariant_dict: %s, \nfreqs: %s, \nupdated_variant_dict: %s" % (variant_dict, full_freqs, str(family_collection.find_one( # {'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']})))) print(" ---> done updating project_id: %s, family_id: %s" % (project_id, family_id)) db.execute("UPDATE all_projects SET finished=1 WHERE project_id=? AND family_id=?", (project_id, family_id))
def get_data_status(self): if get_elasticsearch_dataset(self.project.project_id) is not None: return "loaded" if not self.has_variant_data(): return 'no_variants' elif not get_datastore(self.project.project_id).family_exists( self.project.project_id, self.cohort_id): return 'not_loaded' else: return get_datastore(self.project.project_id).get_family_status( self.project.project_id, self.cohort_id)
def calculate_mendelian_variant_search(search_spec, xfamily): sys.stderr.write(" mendelian_variant_search for %s - search mode: %s %s\n" % (xfamily.project_id, search_spec.search_mode, search_spec.__dict__)) variants = None if search_spec.search_mode == 'standard_inheritance': variants = list(get_variants_with_inheritance_mode( get_mall(xfamily.project_id), xfamily, search_spec.inheritance_mode, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, )) elif search_spec.search_mode == 'custom_inheritance': variants = list(get_variants_family( get_datastore(xfamily.project_id), xfamily, genotype_filter=search_spec.genotype_inheritance_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, )) elif search_spec.search_mode == 'gene_burden': gene_stream = get_genes_family( get_datastore(xfamily.project_id), get_reference(), xfamily, burden_filter=search_spec.gene_burden_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, ) variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference())) elif search_spec.search_mode == 'allele_count': variants = list(get_variants_allele_count( get_datastore(xfamily.project_id), xfamily, search_spec.allele_count_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, )) elif search_spec.search_mode == 'all_variants': variants = list(get_variants_family( get_datastore(xfamily.project_id), xfamily, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, indivs_to_consider=xfamily.indiv_id_list(), )) return variants
def handle(self, *args, **options): project = Project.objects.get(project_id=args[0]) sample_map = dict(l.strip('\n').split('\t') for l in open(args[1]).readlines()) datastore_db = mall.get_datastore()._db for old_id, new_id in sample_map.iteritems(): # change actual IDs indiv = Individual.objects.get(project=project, indiv_id=old_id) indiv.indiv_id = new_id indiv.save() # datastore if indiv.family: mall.get_datastore().delete_family(project.project_id, indiv.family.family_id)
def look_up_individual_loaded_date(source_individual, earliest_loaded_date=False): """Retrieve the data-loaded time for the given individual""" # decode data loaded time loaded_date = None try: datastore = get_datastore(source_individual.project) family_id = source_individual.family.family_id project_id = source_individual.project.project_id if earliest_loaded_date: project_id += "_previous1" # add suffix family_collection = datastore._get_family_collection(project_id, family_id) if hasattr(datastore, '_get_family_collection') else None if not family_collection: #logger.error("mongodb family collection not found for %s %s" % (project_id, family_id)) return loaded_date record = family_collection.find_one() if record: loaded_date = record['_id'].generation_time # logger.info("%s data-loaded date: %s" % (project_id, loaded_date)) else: family_info_record = datastore._get_family_info(project_id, family_id) loaded_date = family_info_record['_id'].generation_time except Exception as e: logger.error('Unable to look up loaded_date for %s' % (source_individual,)) logger.error(e) return loaded_date
def look_up_individual_loaded_date(source_individual, earliest_loaded_date=False): """Retrieve the data-loaded time for the given individual""" # decode data loaded time loaded_date = None try: datastore = get_datastore(source_individual.project) family_id = source_individual.family.family_id project_id = source_individual.project.project_id if earliest_loaded_date: project_id += "_previous1" # add suffix family_collection = datastore._get_family_collection(project_id, family_id) if not family_collection: #logger.error("mongodb family collection not found for %s %s" % (project_id, family_id)) return loaded_date record = family_collection.find_one() if record: loaded_date = record['_id'].generation_time logger.info("%s data-loaded date: %s" % (project_id, loaded_date)) else: family_info_record = datastore._get_family_info(project_id, family_id) loaded_date = family_info_record['_id'].generation_time except Exception as e: logger.error('Unable to look up loaded_date for %s' % (source_individual,)) logger.error(e) return loaded_date
def get_variants_from_variant_tuples(project, variant_tuples, user=None): datastore = get_datastore(project) population_slugs = project.get_reference_population_slugs() variant_tuples_by_family_id = {} for xpos, ref, alt, family_id in variant_tuples: if family_id not in variant_tuples_by_family_id: variant_tuples_by_family_id[family_id] = [] variant_tuples_by_family_id[family_id].append((xpos, ref, alt)) variants = [] for family_id, variant_tuples in variant_tuples_by_family_id.items(): variants_for_family = datastore.get_multiple_variants( project.project_id, family_id, variant_tuples, user=user ) for (xpos, ref, alt), variant in zip(variant_tuples, variants_for_family): if not variant: variant = Variant(xpos, ref, alt) get_annotator().annotate_variant(variant, population_slugs) variant.set_extra('created_variant', True) variant.set_extra('family_id', family_id) variant.set_extra('project_id', project.project_id) variants.append(variant) return variants
def get_saved_variants_for_family(family): """ Returns: List of variants that were saved in this family List of variant tuples where no variants were in the datastore """ search_flags = FamilySearchFlag.objects.filter( family=family).order_by('-date_saved') variants = [] couldntfind = [] variant_tuples = {(v.xpos, v.ref, v.alt) for v in search_flags} for variant_t, variant in zip( variant_tuples, get_datastore(family.project).get_multiple_variants( family.project.project_id, family.family_id, variant_tuples, )): if variant: variants.append(variant) else: couldntfind.append(variant_t) return variants, couldntfind
def get_saved_variants_for_family(family): """ Returns: List of variants that were saved in this family List of variant tuples where no variants were in the datastore """ search_flags = FamilySearchFlag.objects.filter(family=family).order_by('-date_saved') variants = [] couldntfind = [] variant_tuples = {(v.xpos, v.ref, v.alt) for v in search_flags} for variant_t in variant_tuples: variant = get_datastore(family.project.project_id).get_single_variant( family.project.project_id, family.family_id, variant_t[0], variant_t[1], variant_t[2] ) if variant: variants.append(variant) else: couldntfind.append(variant_t) return variants, couldntfind
def get_variants_from_variant_tuples(project, variant_tuples, user=None): datastore = get_datastore(project) population_slugs = project.get_reference_population_slugs() variant_tuples_by_family_id = {} for xpos, ref, alt, family_id in variant_tuples: if family_id not in variant_tuples_by_family_id: variant_tuples_by_family_id[family_id] = [] variant_tuples_by_family_id[family_id].append((xpos, ref, alt)) variants = [] for family_id, variant_tuples in variant_tuples_by_family_id.items(): variants_for_family = datastore.get_multiple_variants( project.project_id, family_id, variant_tuples, user=user) for (xpos, ref, alt), variant in zip(variant_tuples, variants_for_family): if not variant: variant = Variant(xpos, ref, alt) get_annotator().annotate_variant(variant, population_slugs) variant.set_extra('created_variant', True) variant.set_extra('family_id', family_id) variant.set_extra('project_id', project.project_id) variants.append(variant) return variants
def family_variant_view(request, project_id, family_id): project = get_object_or_404(Project, project_id=project_id) family = get_object_or_404(Family, project=project, family_id=family_id) if not project.can_view(request.user): raise PermissionDenied try: xpos = int(request.GET.get('xpos')) ref = request.GET.get('ref') alt = request.GET.get('alt') except: return HttpResponse('Invalid View') variant = get_datastore(project).get_single_variant( project_id, family_id, xpos, ref, alt) add_extra_info_to_variants_project(get_reference(), project, [variant], add_family_tags=True, add_populations=True) return render( request, 'family/family_variant_view.html', { 'project': project, 'family': family, 'variant_json': json.dumps(variant.toJSON()), })
def _set_saved_variant_json(new_variant_tag_or_note, source_variant_tag_or_note, new_family): if new_family is None: return project_id = new_family.project.deprecated_project_id project = Project.objects.get(project_id=project_id) try: variant_info = get_datastore(project).get_single_variant( project_id, new_family.family_id, source_variant_tag_or_note.xpos, source_variant_tag_or_note.ref, source_variant_tag_or_note.alt) except Exception as e: logger.error("Unable to retrieve variant annotations for %s %s: %s" % ( new_family, source_variant_tag_or_note, e)) return if variant_info: add_extra_info_to_variants_project(get_reference(), project, [variant_info], add_family_tags=True, add_populations=True) variant_json = variant_info.toJSON() new_variant_tag_or_note.saved_variant_json = json.dumps(variant_json) new_variant_tag_or_note.save()
def add_family_search_flag(request): error = None for key in [ 'project_id', 'family_id', 'xpos', 'ref', 'alt', 'note', 'flag_type', 'flag_inheritance_mode' ]: if request.GET.get(key, None) == None: error = "%s is requred" % key if not error: project = get_object_or_404(Project, project_id=request.GET.get('project_id')) family = get_object_or_404(Family, project=project, family_id=request.GET.get('family_id')) if not project.can_edit(request.user): raise PermissionDenied if not error: xpos = int(request.GET['xpos']) ref = request.GET.get('ref') alt = request.GET['alt'] note = request.GET.get('note') flag_type = request.GET.get('flag_type') flag_inheritance_mode = request.GET.get('flag_inheritance_mode') # todo: more validation - is variant valid? flag = FamilySearchFlag( user=request.user, family=family, xpos=int(request.GET['xpos']), ref=ref, alt=alt, note=note, flag_type=flag_type, suggested_inheritance=flag_inheritance_mode, date_saved=timezone.now(), ) if not error: flag.save() variant = get_datastore(project.project_id).get_single_variant( family.project.project_id, family.family_id, xpos, ref, alt) api_utils.add_extra_info_to_variant(get_reference(), family, variant) ret = { 'is_error': False, 'variant': variant.toJSON(), } else: ret = { 'is_error': True, 'error': error, } return JSONResponse(ret)
def edit_family_cause(request, project_id, family_id): error = None project = get_object_or_404(Project, project_id=project_id) family = get_object_or_404(Family, project=project, family_id=family_id) if not project.can_admin(request.user): raise PermissionDenied causal_variants = list(CausalVariant.objects.filter(family=family)) if request.GET.get('variant'): xpos, ref, alt = request.GET['variant'].split('|') c = CausalVariant.objects.get_or_create( family=family, xpos=int(xpos), ref=ref, alt=alt, )[0] causal_variants = list(CausalVariant.objects.filter(family=family)) if request.method == 'POST': form = EditFamilyCauseForm(family, request.POST) if form.is_valid(): CausalVariant.objects.filter(family=family).delete() for v_str in request.POST.getlist('variants'): xpos, ref, alt = v_str.split('|') CausalVariant.objects.create( family=family, xpos=int(xpos), ref=ref, alt=alt, ) update_xbrowse_model(family, analysis_status='S', causal_inheritance_mode=form. cleaned_data['inheritance_mode']) return redirect('family_home', project_id=project.project_id, family_id=family.family_id) else: error = server_utils.form_error_string(form) else: form = EditFamilyForm(family) variants = [] for c in causal_variants: variants.append( get_datastore(project).get_single_variant(project_id, family_id, c.xpos, c.ref, c.alt)) return render( request, 'family/edit_cause.html', { 'project': project, 'family': family, 'error': error, 'form': form, 'variants': [v.toJSON() for v in variants], })
def transfer_project(self, from_project_id, destination_project_id): print("From: " + from_project_id) print("To: " + destination_project_id) from_project = Project.objects.get(project_id=from_project_id) destination_project = Project.objects.get(project_id=destination_project_id) # Make sure individuals are the same indivs_missing_from_dest_project = (set( [i.indiv_id for i in Individual.objects.filter(project=from_project)]) - set( [i.indiv_id for i in Individual.objects.filter(project=destination_project)])) if indivs_missing_from_dest_project: raise Exception("Individuals missing from dest project: " + str(indivs_missing_from_dest_project)) # update VCFs vcfs = from_project.families_by_vcf().keys() for vcf_file_path in vcfs: vcf_file = VCFFile.objects.get_or_create(file_path=os.path.abspath(vcf_file_path))[0] sample_management.add_vcf_file_to_project(destination_project, vcf_file) print("Added %s to project %s" % (vcf_file, destination_project.project_id)) families_db = get_datastore()._db projects_db = get_project_datastore()._db print("==========") print("Checking 'from' Projects and Families:") if not check_that_exists(projects_db.projects, {'project_id': from_project_id}, not_more_than_one=True): raise ValueError("There needs to be 1 project db in %(from_project_id)s" % locals()) if not check_that_exists(families_db.families, {'project_id': from_project_id}, not_more_than_one=False): raise ValueError("There needs to be atleast 1 family db in %(from_project_id)s" % locals()) print("==========") print("Make Updates:") datestamp = datetime.now().strftime("%Y-%m-%d") if check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True): result = update(projects_db.projects, {'project_id': destination_project_id}, {'project_id': destination_project_id+'_previous', 'version': datestamp}) if check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False): result = update(families_db.families, {'project_id': destination_project_id}, {'project_id': destination_project_id+'_previous', 'version': datestamp}) result = update(projects_db.projects, {'project_id': from_project_id}, {'project_id': destination_project_id, 'version': '2'}) result = update(families_db.families, {'project_id': from_project_id}, {'project_id': destination_project_id, 'version': '2'}) print("==========") print("Checking Projects:") if not check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True): raise ValueError("After: There needs to be 1 project db in %(destination_project_id)s" % locals()) if not check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False): raise ValueError("After: There needs to be atleast 1 family db in %(destination_project_id)s" % locals()) update_family_analysis_status(destination_project_id) print("Data transfer finished.") i = raw_input("Delete the 'from' project: %s? [Y/n] " % from_project_id) if i.strip() == 'Y': sample_management.delete_project(from_project_id) print("Project %s deleted" % from_project_id) else: print("Project not deleted")
def handle(self, *args, **options): #genomicFeatures section self.all_gene_lists = defaultdict(set) self.gene_to_gene_lists = defaultdict(set) for gene_list in GeneList.objects.all(): print('gene list: [%s]' % gene_list.name) self.all_gene_lists[gene_list.name] = set( g.gene_id for g in gene_list.genelistitem_set.all()) for g in gene_list.genelistitem_set.all(): self.gene_to_gene_lists[g.gene_id].add(gene_list.name) print("starting... ") gene_to_projects = defaultdict(set) gene_to_variants = defaultdict(set) gene_to_families = defaultdict(set) gene_to_variant_tags = defaultdict(set) Key = namedtuple('Key', 'gene_id, gene_name') project_ids = defaultdict(int) for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'): project_tag = variant_tag.project_tag project_id = project_tag.project.project_id project_ids[project_id] += 1 tag_name = project_tag.tag.lower() variant = get_datastore(project_id).get_single_variant( project_id, variant_tag.family.family_id, variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) # print(gene_to_projects) if variant is None: #print("Variant %s no longer called in this family (did the callset version change?)" % (variant_tag.toJSON())) continue #print(project_id,variant.toJSON()['gene_ids']) if variant.gene_ids is not None: for gene_id in variant.gene_ids: gene_name = get_reference().get_gene_symbol(gene_id) key = Key._make([gene_id, gene_name]) gene_to_variants[key].add( "%s-%s-%s-%s" % (variant.chr, variant.pos, variant.ref, variant.alt)) if variant_tag.family: gene_to_families[key].add(variant_tag.family.family_id) gene_to_variant_tags[key].add(tag_name) gene_to_projects[key].add(project_id.lower()) if len(gene_to_projects) % 50 == 0: self.print_out(gene_to_projects, gene_to_families, gene_to_variants, gene_to_variant_tags) self.print_out(gene_to_projects, gene_to_families, gene_to_variants, gene_to_variant_tags)
def handle(self, *args, **options): #genomicFeatures section self.all_gene_lists = defaultdict(set) self.gene_to_gene_lists = defaultdict(set) for gene_list in GeneList.objects.all(): print('gene list: [%s]' % gene_list.name) self.all_gene_lists[gene_list.name] = set(g.gene_id for g in gene_list.genelistitem_set.all()) for g in gene_list.genelistitem_set.all(): self.gene_to_gene_lists[g.gene_id].add(gene_list.name) print("starting... ") gene_to_projects = defaultdict(set) gene_to_variants = defaultdict(set) gene_to_families = defaultdict(set) gene_to_variant_tags = defaultdict(set) gene_to_variant_and_families = defaultdict(lambda: defaultdict(set)) Key = namedtuple('Key', 'gene_id, gene_name') project_ids = defaultdict(int) for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'): project_tag = variant_tag.project_tag project_id = project_tag.project.project_id project_ids[project_id] += 1 tag_name = project_tag.tag.lower() variant = get_datastore(project_tag.project).get_single_variant( project_id, variant_tag.family.family_id, variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) # print(gene_to_projects) if variant is None: #print("Variant %s no longer called in this family (did the callset version change?)" % (variant_tag.toJSON())) continue #print(project_id,variant.toJSON()['gene_ids']) if variant.gene_ids is not None: for gene_id in variant.gene_ids: gene_name = get_reference().get_gene_symbol(gene_id) key = Key._make([gene_id, gene_name]) variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos, variant.ref, variant.alt) gene_to_variants[key].add(variant_id) if variant_tag.family: gene_to_families[key].add(variant_tag.family) gene_to_variant_tags[key].add(tag_name) gene_to_projects[key].add(project_id.lower()) gene_to_variant_and_families[key][variant_id].add(variant_tag.family.family_id) if len(gene_to_projects) % 50 == 0: self.print_out(gene_to_projects, gene_to_families, gene_to_variants, gene_to_variant_tags, gene_to_variant_and_families) self.print_out(gene_to_projects, gene_to_families, gene_to_variants, gene_to_variant_tags, gene_to_variant_and_families)
def update_pop_freqs_in_family_tables(self): # Load family tables population_frequency_store = mall.get_annotator().get_population_frequency_store() db = sqlite3.connect("reference_populations_family_tables.db", isolation_level=None) db.execute("CREATE TABLE if not exists all_projects(project_id varchar(200), family_id varchar(200), started bool, finished bool)") db.execute("CREATE UNIQUE INDEX IF NOT EXISTS all_projects_idx ON all_projects(project_id, family_id)") for project in Project.objects.all().order_by('-last_accessed_date'): project_id = project.project_id datastore = get_datastore(project_id) for i, family_info in enumerate(datastore._get_family_info(project_id)): family_id = family_info['family_id'] db.execute("INSERT OR IGNORE INTO all_projects VALUES (?, ?, 0, 0)", (project_id, family_id)) # Go through each project in decending order population_slugs_to_load = [population_spec['slug'] for population_spec in annotator_settings.reference_populations_to_load] while True: remaining_work = list(db.execute("SELECT project_id, family_id FROM all_projects WHERE started=0 ORDER BY RANDOM()")) print("%d projects / families remaining" % len(remaining_work)) if not remaining_work: print("Done with all projects/families") break project_id, family_id = remaining_work[0] datastore = get_datastore(project_id) print(" updating %s / %s" % (project_id, family_id)) db.execute("UPDATE all_projects SET started=1 WHERE project_id=? AND family_id=?", (project_id, family_id)) family_collection = datastore._get_family_collection(project_id, family_id) for variant_dict in family_collection.find(): freqs = population_frequency_store.get_frequencies(variant_dict['xpos'], variant_dict['ref'], variant_dict['alt']) full_freqs = {'db_freqs.'+population_slug: freqs.get(population_slug, 0) for population_slug in population_slugs_to_load} family_collection.update({'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']}, {'$set': full_freqs}, upsert=False) #print("---------\nvariant_dict: %s, \nfreqs: %s, \nupdated_variant_dict: %s" % (variant_dict, full_freqs, str(family_collection.find_one( # {'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']})))) print(" ---> done updating project_id: %s, family_id: %s" % (project_id, family_id)) db.execute("UPDATE all_projects SET finished=1 WHERE project_id=? AND family_id=?", (project_id, family_id))
def handle(self, *args, **options): # default display is individuals if len(args) > 0: display = args[0] else: display = 'individuals' if display == 'families': for project_id, family_id in get_datastore().get_all_families(): fields = [ project_id, family_id, ",".join(get_datastore().get_individuals_for_family(project_id, family_id)) ] print "\t".join(fields) elif display == 'individuals': for project_id, indiv_id in get_datastore().get_all_individuals(): print "\t".join([project_id, indiv_id])
def handle(self, *args, **options): # default display is individuals if len(args) > 0: display = args[0] else: display = 'individuals' if display == 'families': for project_id, family_id in get_datastore().get_all_families(): fields = [ project_id, family_id, ",".join(get_datastore().get_individuals_for_family( project_id, family_id)) ] print "\t".join(fields) elif display == 'individuals': for project_id, indiv_id in get_datastore().get_all_individuals(): print "\t".join([project_id, indiv_id])
def edit_family_cause(request, project_id, family_id): error = None project = get_object_or_404(Project, project_id=project_id) family = get_object_or_404(Family, project=project, family_id=family_id) if not project.can_admin(request.user): raise PermissionDenied causal_variants = list(CausalVariant.objects.filter(family=family)) if request.GET.get('variant'): xpos, ref, alt = request.GET['variant'].split('|') c = CausalVariant.objects.get_or_create( family=family, xpos=int(xpos), ref=ref, alt=alt, )[0] causal_variants = list(CausalVariant.objects.filter(family=family)) if request.method == 'POST': form = EditFamilyCauseForm(family, request.POST) if form.is_valid(): CausalVariant.objects.filter(family=family).delete() for v_str in request.POST.getlist('variants'): xpos, ref, alt = v_str.split('|') CausalVariant.objects.create( family=family, xpos=int(xpos), ref=ref, alt=alt, ) update_xbrowse_model( family, analysis_status = 'S', causal_inheritance_mode = form.cleaned_data['inheritance_mode']) return redirect('family_home', project_id=project.project_id, family_id=family.family_id) else: error = server_utils.form_error_string(form) else: form = EditFamilyForm(family) variants = [] for c in causal_variants: variants.append(get_datastore(project).get_single_variant(project_id, family_id, c.xpos, c.ref, c.alt)) return render(request, 'family/edit_cause.html', { 'project': project, 'family': family, 'error': error, 'form': form, 'variants': [v.toJSON() for v in variants], })
def add_family_search_flag(request): # TODO: this view not like the others - refactor to forms error = None for key in ['project_id', 'family_id', 'xpos', 'ref', 'alt', 'note', 'flag_type', 'flag_inheritance_mode']: if request.GET.get(key, None) == None: error = "%s is requred" % key if not error: project = get_object_or_404(Project, project_id=request.GET.get('project_id')) family = get_object_or_404(Family, project=project, family_id=request.GET.get('family_id')) if not project.can_edit(request.user): return PermissionDenied if not error: xpos = int(request.GET['xpos']) ref=request.GET.get('ref') alt=request.GET['alt'] note=request.GET.get('note') flag_type=request.GET.get('flag_type') flag_inheritance_mode=request.GET.get('flag_inheritance_mode') # todo: more validation - is variant valid? flag = FamilySearchFlag(user=request.user, family=family, xpos=int(request.GET['xpos']), ref=ref, alt=alt, note=note, flag_type=flag_type, suggested_inheritance=flag_inheritance_mode, date_saved = datetime.datetime.now(), ) if not error: flag.save() variant = get_datastore(project.project_id).get_single_variant(family.project.project_id, family.family_id, xpos, ref, alt ) api_utils.add_extra_info_to_variant(get_reference(), family, variant) ret = { 'is_error': False, 'variant': variant.toJSON(), } else: ret = { 'is_error': True, 'error': error, } return JSONResponse(ret)
def handle(self, *args, **options): project_id = options["project_id"] family_ids = args project = Project.objects.get(project_id=project_id) for vcf_file, families in project.families_by_vcf().items(): families_to_load = [] for family in families: family_id = family.family_id print("Checking id: " + family_id) if not family_ids or family.family_id not in family_ids: continue # delete this family get_datastore(project_id).delete_family(project_id, family_id) families_to_load.append(family) # reload family print("Loading %(project_id)s %(families_to_load)s" % locals()) xbrowse_controls.load_variants_for_family_list(project, families_to_load, vcf_file)
def get_causal_variants_for_project(project): variant_t_list = [ (v.xpos, v.ref, v.alt, v.family.family_id) for v in CausalVariant.objects.filter(family__project=project) ] variants = [] for xpos, ref, alt, family_id in variant_t_list: variant = get_datastore(project.project_id).get_single_variant(project.project_id, family_id, xpos, ref, alt) if variant: variant.set_extra("family_id", family_id) variant.set_extra("project_id", project.project_id) variants.append(variant) return variants
def get_variants_from_note_tuples(project, note_tuples): variants = [] for note_t in note_tuples: variant = get_datastore(project.project_id).get_single_variant( project.project_id, note_t[3], note_t[0], note_t[1], note_t[2] ) if not variant: variant = Variant(note_t[0], note_t[1], note_t[2]) get_annotator().annotate_variant(variant, project.get_reference_population_slugs()) # variant.annotation = get_annotator().get_variant(note_t[0], note_t[1], note_t[2]) variant.set_extra("family_id", note_t[3]) variant.set_extra("project_id", project.project_id) variants.append(variant) return variants
def get_variants_from_variant_tuples(project, variant_tuples): variants = [] for t in variant_tuples: variant = get_datastore(project.project_id).get_single_variant( project.project_id, t[3], t[0], t[1], t[2]) if not variant: variant = Variant(t[0], t[1], t[2]) get_annotator().annotate_variant( variant, project.get_reference_population_slugs()) variant.set_extra('family_id', t[3]) variant.set_extra('project_id', project.project_id) variants.append(variant) return variants
def get_variants_for_inheritance_for_project(project, inheritance_mode): """ Get the variants for this project / inheritance combo Return dict of family -> list of variants """ # create search specification # this could theoretically differ by project, if there are different reference populations #variant_filter = VariantFilter(so_annotations=SO_SEVERITY_ORDER, ref_freqs=[]) variant_filter = get_default_variant_filter('moderate_impact') variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold)) variant_filter.ref_freqs.append( ('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold)) variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold)) variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold)) variant_filter.ref_freqs.append( ('merck-wgs-3793', merck_wgs_3793_threshold)) #variant_filter.ref_freqs.append(('merck-pcr-free-wgs-144', merck_wgs_144_threshold)) quality_filter = { # 'vcf_filter': 'pass', 'min_gq': GQ_threshold, 'min_ab': AB_threshold, } # run MendelianVariantSearch for each family, collect results families = project.get_families() for i, family in enumerate(families): print("Processing %s - family %s (%d / %d)" % (inheritance_mode, family.family_id, i + 1, len(families))) try: if inheritance_mode == "all_variants": yield family, list( get_variants(get_datastore(project.project_id), family.xfamily(), variant_filter=variant_filter, quality_filter=quality_filter, indivs_to_consider=family.indiv_id_list())) else: yield family, list( get_variants_with_inheritance_mode( get_mall(project.project_id), family.xfamily(), inheritance_mode, variant_filter=variant_filter, quality_filter=quality_filter, )) except ValueError as e: print("Error: %s. Skipping family %s" % (str(e), str(family)))
def saved_variant_transcripts(request, variant_guid): saved_variant = SavedVariant.objects.get(guid=variant_guid) check_permissions(saved_variant.project, request.user, CAN_VIEW) # TODO when variant search is rewritten for seqr models use that here base_project = find_matching_xbrowse_model(saved_variant.project) loaded_variant = get_datastore(base_project).get_single_variant( base_project.project_id, saved_variant.family.family_id, saved_variant.xpos, saved_variant.ref, saved_variant.alt, ) return create_json_response({variant_guid: {'transcripts': _variant_transcripts(loaded_variant.annotation)}})
def get_causal_variants_for_project(project): variant_t_list = [ (v.xpos, v.ref, v.alt, v.family.family_id) for v in CausalVariant.objects.filter(family__project=project) ] variants = [] for xpos, ref, alt, family_id in variant_t_list: variant = get_datastore(project.project_id).get_single_variant( project.project_id, family_id, xpos, ref, alt) if variant: variant.set_extra('family_id', family_id) variant.set_extra('project_id', project.project_id) variants.append(variant) return variants
def cohort_gene_search_variants(request): # TODO: this view not like the others - refactor to forms error = None project, cohort = get_project_and_cohort_for_user(request.user, request.GET) if not project.can_view(request.user): return PermissionDenied form = api_forms.CohortGeneSearchVariantsForm(request.GET) if form.is_valid(): gene_id = form.cleaned_data['gene_id'] inheritance_mode = form.cleaned_data['inheritance_mode'] variant_filter = form.cleaned_data['variant_filter'] quality_filter = form.cleaned_data['quality_filter'] else: error = server_utils.form_error_string(form) if not error: indivs_with_inheritance, gene_variation = cohort_search.get_individuals_with_inheritance_in_gene( get_datastore(project.project_id), get_reference(), cohort.xcohort(), inheritance_mode, gene_id, variant_filter=variant_filter, quality_filter=quality_filter ) relevant_variants = gene_variation.get_relevant_variants_for_indiv_ids(cohort.indiv_id_list()) api_utils.add_extra_info_to_variants_family(get_reference(), cohort, relevant_variants) ret = { 'is_error': False, 'variants': [v.toJSON() for v in relevant_variants], 'gene_info': get_reference().get_gene(gene_id), } return JSONResponse(ret) else: ret = { 'is_error': True, 'error': error } return JSONResponse(ret)
def export_project_variants(request, project_id): """ Export all variants associated to this project Args: Project id Returns: A JSON object of variant information """ project = get_object_or_404(Project, project_id=project_id) if not project.can_view(request.user): raise PermissionDenied status_description_map = {} for abbrev, details in ANALYSIS_STATUS_CHOICES: status_description_map[abbrev] = details[0] variants = [] project_tags = ProjectTag.objects.filter(project__project_id=project_id) for project_tag in project_tags: variant_tags = VariantTag.objects.filter(project_tag=project_tag) for variant_tag in variant_tags: variant = get_datastore(project.project_id).get_single_variant( project.project_id, variant_tag.family.family_id if variant_tag.family else '', variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) variant_json = variant.toJSON() if variant is not None else { 'xpos': variant_tag.xpos, 'ref': variant_tag.ref, 'alt': variant_tag.alt } family_status = '' if variant_tag.family: family_status = status_description_map.get( variant_tag.family.analysis_status, 'unknown') variants.append({ "variant": variant_json, "tag": project_tag.tag, "description": project_tag.title, "family": variant_tag.family.toJSON(), "family_status": family_status }) return JSONResponse(variants)
def cohort_gene_search_variants(request): # TODO: this view not like the others - refactor to forms error = None project, cohort = get_project_and_cohort_for_user(request.user, request.GET) if not project.can_view(request.user): return HttpResponse('unauthorized') form = api_forms.CohortGeneSearchVariantsForm(request.GET) if form.is_valid(): gene_id = form.cleaned_data['gene_id'] inheritance_mode = form.cleaned_data['inheritance_mode'] variant_filter = form.cleaned_data['variant_filter'] quality_filter = form.cleaned_data['quality_filter'] else: error = server_utils.form_error_string(form) if not error: indivs_with_inheritance, gene_variation = cohort_search.get_individuals_with_inheritance_in_gene( get_datastore(project.project_id), get_reference(), cohort.xcohort(), inheritance_mode, gene_id, variant_filter=variant_filter, quality_filter=quality_filter) relevant_variants = gene_variation.get_relevant_variants_for_indiv_ids( cohort.indiv_id_list()) api_utils.add_extra_info_to_variants_family(get_reference(), cohort, relevant_variants) ret = { 'is_error': False, 'variants': [v.toJSON() for v in relevant_variants], 'gene_info': get_reference().get_gene(gene_id), } return JSONResponse(ret) else: ret = {'is_error': True, 'error': error} return JSONResponse(ret)
def get_variants_for_inheritance_for_project(project, inheritance_mode): """ Get the variants for this project / inheritance combo Return dict of family -> list of variants """ # create search specification # this could theoretically differ by project, if there are different reference populations #variant_filter = VariantFilter(so_annotations=SO_SEVERITY_ORDER, ref_freqs=[]) variant_filter = get_default_variant_filter('moderate_impact') variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold)) variant_filter.ref_freqs.append(('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold)) variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold)) variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold)) variant_filter.ref_freqs.append(('merck-wgs-3793', merck_wgs_3793_threshold)) #variant_filter.ref_freqs.append(('merck-pcr-free-wgs-144', merck_wgs_144_threshold)) quality_filter = { # 'vcf_filter': 'pass', 'min_gq': GQ_threshold, 'min_ab': AB_threshold, } # run MendelianVariantSearch for each family, collect results families = project.get_families() for i, family in enumerate(families): print("Processing %s - family %s (%d / %d)" % (inheritance_mode, family.family_id, i+1, len(families))) try: if inheritance_mode == "all_variants": yield family, list(get_variants( get_datastore(project.project_id), family.xfamily(), variant_filter=variant_filter, quality_filter=quality_filter, indivs_to_consider=family.indiv_id_list() )) else: yield family, list(get_variants_with_inheritance_mode( get_mall(project.project_id), family.xfamily(), inheritance_mode, variant_filter=variant_filter, quality_filter=quality_filter, )) except ValueError as e: print("Error: %s. Skipping family %s" % (str(e), str(family)))
def gather_all_annotated_genes_in_seqr(): """ Finds all genes mentioned in seqr Args: No arguments Returns A default dict where the key is a named tuple of gene HGNC ID and ensemble ID and the values are projects where this gene appears """ #genomicFeatures section all_gene_lists = defaultdict(set) gene_to_gene_lists = defaultdict(set) for gene_list in GeneList.objects.all(): all_gene_lists[gene_list.name] = set( g.gene_id for g in gene_list.genelistitem_set.all()) for g in gene_list.genelistitem_set.all(): gene_to_gene_lists[g.gene_id].add(gene_list.name) gene_to_projects = defaultdict(set) Key = namedtuple('Key', 'gene_id, gene_name') project_ids = defaultdict(int) for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'): project_tag = variant_tag.project_tag project_id = project_tag.project.project_id project_ids[project_id] += 1 tag_name = project_tag.tag.lower() variant = get_datastore(project_id).get_single_variant( project_id, variant_tag.family.family_id, variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) if variant is None: continue if variant.gene_ids is not None: for gene_id in variant.gene_ids: gene_name = get_reference().get_gene_symbol(gene_id) key = Key._make([gene_id, gene_name]) gene_to_projects[key].add(project_id.lower()) return gene_to_projects
def get_variants_from_variant_tuples(project, variant_tuples): variants = [] for t in variant_tuples: variant = get_datastore(project.project_id).get_single_variant( project.project_id, t[3], t[0], t[1], t[2] ) if not variant: variant = Variant(t[0], t[1], t[2]) get_annotator().annotate_variant(variant, project.get_reference_population_slugs()) variant.set_extra('family_id', t[3]) variant.set_extra('project_id', project.project_id) variants.append(variant) return variants
def gather_all_annotated_genes_in_seqr(): """ Finds all genes mentioned in seqr Args: No arguments Returns A default dict where the key is a named tuple of gene HGNC ID and ensemble ID and the values are projects where this gene appears """ #genomicFeatures section all_gene_lists = defaultdict(set) gene_to_gene_lists = defaultdict(set) for gene_list in GeneList.objects.all(): all_gene_lists[gene_list.name] = set(g.gene_id for g in gene_list.genelistitem_set.all()) for g in gene_list.genelistitem_set.all(): gene_to_gene_lists[g.gene_id].add(gene_list.name) gene_to_projects = defaultdict(set) Key = namedtuple('Key', 'gene_id, gene_name') project_ids = defaultdict(int) for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'): project_tag = variant_tag.project_tag project_id = project_tag.project.project_id project_ids[project_id] += 1 tag_name = project_tag.tag.lower() variant = get_datastore(project_tag.project).get_single_variant( project_id, variant_tag.family.family_id, variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) if variant is None: continue if variant.gene_ids is not None: for gene_id in variant.gene_ids: gene_name = get_reference().get_gene_symbol(gene_id) key = Key._make([gene_id, gene_name]) gene_to_projects[key].add(project_id.lower()) return gene_to_projects
def add_variant_note(request): """ """ family = None if 'family_id' in request.GET: project, family = get_project_and_family_for_user( request.user, request.GET) else: project = utils.get_project_for_user(request.user, request.GET) form = api_forms.VariantNoteForm(project, request.GET) if form.is_valid(): note = VariantNote.objects.create( user=request.user, date_saved=datetime.datetime.now(), project=project, note=form.cleaned_data['note_text'], xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt'], ) if family: note.family = family note.save() variant = get_datastore(project.project_id).get_single_variant( project.project_id, family.family_id, form.cleaned_data['xpos'], form.cleaned_data['ref'], form.cleaned_data['alt'], ) add_extra_info_to_variants_family(get_reference(), family, [ variant, ]) ret = { 'is_error': False, 'variant': variant.toJSON(), } else: ret = {'is_error': True, 'error': server_utils.form_error_string(form)} return JSONResponse(ret)
def add_variant_note(request): """ """ family = None if 'family_id' in request.GET: project, family = get_project_and_family_for_user(request.user, request.GET) else: project = utils.get_project_for_user(request.user, request.GET) form = api_forms.VariantNoteForm(project, request.GET) if form.is_valid(): note = VariantNote.objects.create( user=request.user, date_saved=datetime.datetime.now(), project=project, note=form.cleaned_data['note_text'], xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt'], ) if family: note.family = family note.save() variant = get_datastore(project.project_id).get_single_variant( project.project_id, family.family_id, form.cleaned_data['xpos'], form.cleaned_data['ref'], form.cleaned_data['alt'], ) add_extra_info_to_variants_family(get_reference(), family, [variant,]) ret = { 'is_error': False, 'variant': variant.toJSON(), } else: ret = { 'is_error': True, 'error': server_utils.form_error_string(form) } return JSONResponse(ret)
def family_variant_annotation(request): # TODO: this view not like the others - refactor to forms error = None for key in ['project_id', 'family_id', 'xpos', 'ref', 'alt']: if request.GET.get(key) is None: error = "%s is requred", key if not error: project = get_object_or_404(Project, project_id=request.GET.get('project_id')) family = get_object_or_404(Family, project=project, family_id=request.GET.get('family_id')) if not project.can_view(request.user): return PermissionDenied if not error: variant = get_datastore(project.project_id).get_single_variant( family.project.project_id, family.family_id, int(request.GET['xpos']), request.GET['ref'], request.GET['alt'] ) if not variant: error = "Variant does not exist" if not error: ret = { 'variant': variant.toJSON(), 'is_error': False, } else: ret = { 'is_error': True, 'error': error, } return JSONResponse(ret)
def edit_variant_tags(request): family = None if 'family_id' in request.GET: project, family = get_project_and_family_for_user( request.user, request.GET) else: project = utils.get_project_for_user(request.user, request.GET) form = api_forms.VariantTagsForm(project, request.GET) if form.is_valid(): VariantTag.objects.filter(family=family, xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt']).delete() for project_tag in form.cleaned_data['project_tags']: VariantTag.objects.create( project_tag=project_tag, family=family, xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt'], ) variant = get_datastore(project.project_id).get_single_variant( project.project_id, family.family_id, form.cleaned_data['xpos'], form.cleaned_data['ref'], form.cleaned_data['alt'], ) add_extra_info_to_variants_family(get_reference(), family, [ variant, ]) ret = { 'is_error': False, 'variant': variant.toJSON(), } else: ret = {'is_error': True, 'error': server_utils.form_error_string(form)} return JSONResponse(ret)
def family_variant_view(request, project_id, family_id): project = get_object_or_404(Project, project_id=project_id) family = get_object_or_404(Family, project=project, family_id=family_id) if not project.can_view(request.user): return HttpResponse('unauthorized') try: xpos = int(request.GET.get('xpos')) ref = request.GET.get('ref') alt = request.GET.get('alt') except: return HttpResponse('Invalid View') variant = get_datastore(project_id).get_single_variant(project_id, family_id, xpos, ref, alt) add_extra_info_to_variants_family(get_reference(), family, [variant]) return render(request, 'family/family_variant_view.html', { 'project': project, 'family': family, 'variant_json': json.dumps(variant.toJSON()), })
def edit_variant_tags(request): family = None if 'family_id' in request.GET: project, family = get_project_and_family_for_user(request.user, request.GET) else: project = utils.get_project_for_user(request.user, request.GET) form = api_forms.VariantTagsForm(project, request.GET) if form.is_valid(): VariantTag.objects.filter(family=family, xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt']).delete() for project_tag in form.cleaned_data['project_tags']: VariantTag.objects.create( user=request.user, date_saved=datetime.datetime.now(), project_tag=project_tag, family=family, xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt'], ) variant = get_datastore(project.project_id).get_single_variant( project.project_id, family.family_id, form.cleaned_data['xpos'], form.cleaned_data['ref'], form.cleaned_data['alt'], ) add_extra_info_to_variants_family(get_reference(), family, [variant,]) ret = { 'is_error': False, 'variant': variant.toJSON(), } else: ret = { 'is_error': True, 'error': server_utils.form_error_string(form) } return JSONResponse(ret)
def transfer_project(self, from_project_id, destination_project_id): print("From: " + from_project_id) print("To: " + destination_project_id) from_project = Project.objects.get(project_id=from_project_id) destination_project = Project.objects.get( project_id=destination_project_id) # Make sure individuals are the same indivs_missing_from_dest_project = (set([ i.indiv_id for i in Individual.objects.filter(project=from_project) ]) - set([ i.indiv_id for i in Individual.objects.filter(project=destination_project) ])) if indivs_missing_from_dest_project: raise Exception("Individuals missing from dest project: " + str(indivs_missing_from_dest_project)) # update VCFs vcfs = from_project.families_by_vcf().keys() for vcf_file_path in vcfs: vcf_file = VCFFile.objects.get_or_create( file_path=os.path.abspath(vcf_file_path))[0] sample_management.add_vcf_file_to_project(destination_project, vcf_file) print("Added %s to project %s" % (vcf_file, destination_project.project_id)) families_db = get_datastore(from_project_id)._db projects_db = get_project_datastore(from_project_id)._db print("==========") print("Checking Projects:") check_that_exists(projects_db.projects, {'project_id': from_project_id}, not_more_than_one=True) check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True) print("==========") print("Checking Families:") check_that_exists(families_db.families, {'project_id': from_project_id}, not_more_than_one=False) check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False) print("==========") print("Make Updates:") result = update( projects_db.projects, {'project_id': destination_project_id}, { 'project_id': destination_project_id + '_previous1', 'version': '1' }) result = update(projects_db.projects, {'project_id': from_project_id}, { 'project_id': destination_project_id, 'version': '2' }) result = update( families_db.families, {'project_id': destination_project_id}, { 'project_id': destination_project_id + '_previous1', 'version': '1' }) result = update(families_db.families, {'project_id': from_project_id}, { 'project_id': destination_project_id, 'version': '2' }) print("==========") print("Checking Projects:") check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True) print("==========") print("Checking Families:") check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False) update_family_analysis_status(destination_project_id) print("Data transfer finished.") i = raw_input("Delete the 'from' project: %s? [Y/n] " % from_project_id) if i.strip() == 'Y': sample_management.delete_project(from_project_id) print("Project %s deleted" % from_project_id) else: print("Project not deleted")
def calculate_cohort_gene_search(cohort, search_spec): """ Calculate search results from the params in search_spec Should be called after cache is checked - this does all the computation Returns (is_error, genes) tuple """ xcohort = cohort.xcohort() cohort_size = len(xcohort.individuals) indiv_id_list = xcohort.indiv_id_list() genes = [] for gene_id, indivs_with_inheritance, gene_variation in cohort_get_genes_with_inheritance( get_datastore(cohort.project.project_id), get_reference(), xcohort, search_spec.inheritance_mode, search_spec.variant_filter, search_spec.quality_filter, ): num_hits = len(indivs_with_inheritance) # don't return genes with a single variant if num_hits < 2: continue try: start_pos, end_pos = get_reference().get_gene_bounds(gene_id) chr, start = genomeloc.get_chr_pos(start_pos) end = genomeloc.get_chr_pos(end_pos)[1] except KeyError: chr, start, end = None, None, None control_cohort = cohort.project.default_control_cohort if cohort.project.default_control_cohort else settings.DEFAULT_CONTROL_COHORT control_comparison = population_controls.control_comparison( control_cohort, gene_id, num_hits, cohort_size, search_spec.inheritance_mode, search_spec.variant_filter, search_spec.quality_filter ) xgene = get_reference().get_gene(gene_id) if xgene is None: continue sys.stderr.write(" cohort_gene_search - found gene: %s, gene_id: %s \n" % (xgene['symbol'], gene_id, )) gene = { 'gene_info': xgene, 'gene_id': gene_id, 'gene_name': xgene['symbol'], 'num_hits': num_hits, 'num_unique_variants': len(gene_variation.get_relevant_variants_for_indiv_ids(indiv_id_list)), 'chr': chr, 'start': start, 'end': end, 'control_comparison': control_comparison, } genes.append(gene) sys.stderr.write(" cohort_gene_search - finished. (cohort_genes_with_inheritance iterator)") return genes
def transfer_project(self, from_project_id, to_project_id): """ The following models are transfered between projects. ProjectCollaborator => user = models.ForeignKey(User), project = models.ForeignKey('base.Project'), collaborator_type = models.CharField(max_length=20, choices=COLLABORATOR_TYPES, default="collaborator") Project => (private_reference_populations = models.ManyToManyField(ReferencePopulation), gene_lists = models.ManyToManyField('gene_lists.GeneList', through='ProjectGeneList')) Family => Project, FamilyGroup => Project (families = models.ManyToManyField(Family)) FamilyImageSlide => Family Cohort => Project (individuals = models.ManyToManyField('base.Individual'), vcf_files, bam_file) Individual => Project, Family # vcf_files = models.ManyToManyField(VCFFile, null=True, blank=True), bam_file = models.ForeignKey('datasets.BAMFile', null=True, blank=True) FamilySearchFlag => User, Family CausalVariant => Family ProjectTag => Project VariantTag => ProjectTag, Family VariantNote => User, Project IndividualPhenotype => Individual, ProjectPhenotype ProjectPhenotype => Project """ families_db = get_datastore()._db # Project from_project = Project.objects.get(project_id=from_project_id) to_project, created = Project.objects.get_or_create(project_id=to_project_id) if created: print("Created project: " + str(to_project)) to_project.description = from_project.description to_project.save() # ProjectCollaborator #for c in ProjectCollaborator.objects.filter(project=from_project): # ProjectCollaborator.objects.get_or_create(project=to_project, user=c.user, collaborator_type=c.collaborator_type) # Reference Populations for reference_population in from_project.private_reference_populations.all(): print("Adding private reference population: " + reference_population.slug) to_project.private_reference_populations.add(reference_population) to_project.save() # Family to_family_id_to_family = {} # maps family_id to the to_family object for from_f in Family.objects.filter(project=from_project): to_f, created = Family.objects.get_or_create(project=to_project, family_id=from_f.family_id) if not created: print("Matched family ids %s (%s) to %s (%s)" % (from_f.family_id, from_f.short_description, to_f.family_id, to_f.short_description)) to_family_id_to_family[to_f.family_id] = to_f to_f.family_name = from_f.family_name to_f.short_description = from_f.short_description to_f.about_family_content = from_f.about_family_content to_f.analysis_summary_content = from_f.analysis_summary_content to_f.coded_phenotype = from_f.coded_phenotype to_f.post_discovery_omim_number = from_f.post_discovery_omim_number to_f.pedigree_image = from_f.pedigree_image to_f.pedigree_image_height = from_f.pedigree_image_height to_f.pedigree_image_width = from_f.pedigree_image_width to_f.analysis_status = from_f.analysis_status to_f.analysis_status_date_saved = from_f.analysis_status_date_saved to_f.analysis_status_saved_by = from_f.analysis_status_saved_by to_f.causal_inheritance_mode = from_f.causal_inheritance_mode to_f.internal_case_review_notes = from_f.internal_case_review_notes to_f.internal_case_review_brief_summary = from_f.internal_case_review_brief_summary to_f.save() update( families_db.families, {'project_id': to_project.project_id, 'family_id': to_f.family_id }, { "status" : "loaded", "family_id" : to_f.family_id, "individuals" : [i.indiv_id for i in Individual.objects.filter(project=from_project, family=from_f)], "coll_name" : "family_%s_%s" % (from_project.project_id, from_f.family_id), "project_id" : to_project.project_id }, upsert=True ) # FamilyGroup for from_fg in FamilyGroup.objects.filter(project=from_project): FamilyGroup.objects.get_or_create(project=to_project, slug=from_fg.slug, name=from_fg.name, description=from_fg.description) # FamilyImageSlide #for from_family in Family.objects.filter(project=from_project): # TODO - need to iterate over image slides of from_family, and link to image slides of to_family # FamilyImageSlide.objects.get_or_create(family=to_family, ) # Cohort #cohorts = list(Cohort.objects.filter(project=project)) #output_obj += cohorts # Individual for from_family in Family.objects.filter(project=from_project): to_family = to_family_id_to_family[from_family.family_id] for from_i in Individual.objects.filter(project=from_project, family=from_family): to_i, created = Individual.objects.get_or_create(project=to_project, family=to_family, indiv_id=from_i.indiv_id) if not created: print("matched existing individual: " + str(from_i.indiv_id) + " in family " + from_family.family_id) to_i.created_date = from_i.created_date to_i.affected = from_i.affected to_i.phenotips_id = from_i.phenotips_id to_i.phenotips_data = from_i.phenotips_data to_i.case_review_status = from_i.case_review_status to_i.mean_target_coverage = from_i.mean_target_coverage to_i.coverage_status = from_i.coverage_status to_i.bam_file_path = from_i.bam_file_path to_i.vcf_id = from_i.vcf_id to_i.gender = from_i.gender to_i.in_case_review = from_i.in_case_review to_i.nickname = from_i.nickname to_i.maternal_id = from_i.maternal_id to_i.paternal_id = from_i.paternal_id to_i.other_notes = from_i.other_notes for vcf_file in from_i.vcf_files.all(): if vcf_file not in to_i.vcf_files.all(): to_i.vcf_files.add(vcf_file) to_i.save() for from_v in CausalVariant.objects.filter(family=from_family): CausalVariant.objects.get_or_create( family = to_family, variant_type=from_v.variant_type, xpos=from_v.xpos, ref=from_v.ref, alt=from_v.alt) for from_vn in VariantNote.objects.filter(project=from_project): if from_vn.family.family_id not in to_family_id_to_family: print("Skipping note: " + str(from_vn.toJSON())) continue to_family = to_family_id_to_family[from_vn.family.family_id] VariantNote.objects.get_or_create( project=to_project, family=to_family, user=from_vn.user, date_saved=from_vn.date_saved, note=from_vn.note, xpos=from_vn.xpos, ref=from_vn.ref, alt=from_vn.alt) for from_ptag in ProjectTag.objects.filter(project=from_project): to_ptag, created = ProjectTag.objects.get_or_create(project=to_project, tag=from_ptag.tag, title=from_ptag.title, color=from_ptag.color) for from_vtag in VariantTag.objects.filter(project_tag=from_ptag): if from_vtag.family.family_id not in to_family_id_to_family: print("Skipping tag: " + str(from_vtag.xpos)) continue to_family = to_family_id_to_family[from_vtag.family.family_id] VariantTag.objects.get_or_create( family=to_family, project_tag=to_ptag, xpos=from_vtag.xpos, ref=from_vtag.ref, alt=from_vtag.alt) for project_gene_list in ProjectGeneList.objects.filter(project=from_project): project_gene_list, created = ProjectGeneList.objects.get_or_create(project=to_project, gene_list=project_gene_list.gene_list)
def calculate_mendelian_variant_search(search_spec, family, user=None): xfamily = family.xfamily() project = family.project variants = None if search_spec.search_mode == 'standard_inheritance': variants = list(get_variants_with_inheritance_mode( get_mall(project), xfamily, search_spec.inheritance_mode, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, user=user, )) elif search_spec.search_mode == 'custom_inheritance': variants = list(get_variants_family( get_datastore(project), xfamily, genotype_filter=search_spec.genotype_inheritance_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, user=user, )) elif search_spec.search_mode == 'gene_burden': gene_stream = get_genes_family( get_datastore(project), get_reference(), xfamily, burden_filter=search_spec.gene_burden_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, user=user, ) variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference())) elif search_spec.search_mode == 'allele_count': variants = list(get_variants_allele_count( get_datastore(project), xfamily, search_spec.allele_count_filter, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, user=user, )) elif search_spec.search_mode == 'all_variants': variants = list(get_variants_family( get_datastore(project), xfamily, variant_filter=search_spec.variant_filter, quality_filter=search_spec.quality_filter, indivs_to_consider=xfamily.indiv_id_list(), user=user, )) for variant in variants: variant.set_extra('family_id', family.family_id) return variants
def add_dataset_handler(request, project_guid): """Create or update samples for the given dataset Args: request: Django request object project_guid (string): GUID of the project that should be updated HTTP POST Request body - should contain the following json structure: { 'sampleType': <"WGS", "WES", or "RNA"> (required) 'datasetType': <"VARIANTS", or "ALIGN"> (required) 'elasticsearchIndex': <String> 'datasetPath': <String> 'datasetName': <String> 'ignoreExtraSamplesInCallset': <Boolean> 'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> } } Response body - will contain the following structure: """ logger.info("add_dataset_handler: " + str(request)) project = get_project_and_check_permissions(project_guid, request.user, permission_level=CAN_EDIT) request_json = json.loads(request.body) logger.info("add_dataset_handler: received %s" % pformat(request_json)) required_fields = ['sampleType', 'datasetType'] if any(field not in request_json for field in required_fields): raise ValueError("request must contain fields: {}".format( ', '.join(required_fields))) sample_type = request_json['sampleType'] dataset_type = request_json['datasetType'] elasticsearch_index = request_json.get('elasticsearchIndex') if elasticsearch_index: elasticsearch_index = elasticsearch_index.strip() dataset_path = request_json.get('datasetPath') if dataset_path: dataset_path = dataset_path.strip() dataset_name = request_json.get('datasetName') if dataset_name: dataset_name = dataset_name.strip() ignore_extra_samples_in_callset = request_json.get( 'ignoreExtraSamplesInCallset') ignore_missing_family_members = request_json.get( 'ignoreMissingFamilyMembers') mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId') mapping_file_path = request_json.get('mappingFilePath') try: updated_samples, created_sample_ids = add_dataset( project=project, sample_type=sample_type, dataset_type=dataset_type, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path, dataset_name=dataset_name, max_edit_distance=0, ignore_extra_samples_in_callset=ignore_extra_samples_in_callset, ignore_missing_family_members=ignore_missing_family_members, mapping_file_path=mapping_file_path, mapping_file_id=mapping_file_id, ) # update VCFFile records if updated_samples: if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: base_project = BaseProject.objects.get(seqr_project=project) get_datastore(base_project).bust_project_cache( base_project.project_id) clear_project_results_cache(base_project.project_id) vcf_file = VCFFile.objects.filter( project=base_project, dataset_type=dataset_type, sample_type=sample_type, elasticsearch_index=elasticsearch_index).order_by( '-pk').first() if not vcf_file: vcf_file = VCFFile.objects.create( project=base_project, dataset_type=dataset_type, sample_type=sample_type, elasticsearch_index=elasticsearch_index, ) logger.info("Created vcf file: " + str(vcf_file.__dict__)) vcf_file.file_path = dataset_path or "{}.vcf.gz".format( elasticsearch_index ) # legacy VCFFile model requires non-empty vcf path vcf_file.loaded_date = iter(updated_samples).next().loaded_date vcf_file.save() for indiv in [s.individual for s in updated_samples]: for base_indiv in BaseIndividual.objects.filter( seqr_individual=indiv).only('id'): base_indiv.vcf_files.add(vcf_file) elif dataset_type == Sample.DATASET_TYPE_READ_ALIGNMENTS: for sample in updated_samples: for base_indiv in BaseIndividual.objects.filter( seqr_individual=sample.individual).only('id'): base_indiv.bam_file_path = sample.dataset_file_path base_indiv.save() updated_sample_json = get_json_for_samples(updated_samples, project_guid=project_guid) response = { 'samplesByGuid': {s['sampleGuid']: s for s in updated_sample_json} } updated_individuals = { s['individualGuid'] for s in updated_sample_json if s['sampleId'] in created_sample_ids } if updated_individuals: individuals = Individual.objects.filter( guid__in=updated_individuals).prefetch_related( 'sample_set', 'family').only('guid') response['individualsByGuid'] = { ind.guid: { 'sampleGuids': [s.guid for s in ind.sample_set.only('guid').all()] } for ind in individuals } for ind in individuals: family = ind.family if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA: update_seqr_model(family, analysis_status=Family. ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS) return create_json_response(response) except Exception as e: traceback.print_exc() return create_json_response({'errors': [e.message or str(e)]}, status=400)
def x_variant(self): v = get_datastore(self.family.project.project_id).get_single_variant(self.family.project.project_id, self.family.family_id, self.xpos, self.ref, self.alt) return v