def get_variants_in_gene(self, project_id, family_id, gene_id, genotype_filter=None, variant_filter=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) db_query = self._make_db_query(genotype_filter, modified_variant_filter) collection = self._get_family_collection(project_id, family_id) if not collection: return # we have to collect list in memory here because mongo can't sort on xpos, # as result size can get too big. # need to find a better way to do this. variants = [] for variant_dict in collection.find(db_query).hint([ ('db_gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING) ]): variant = Variant.fromJSON(variant_dict) self.add_annotations_to_variant(variant, project_id) if passes_variant_filter(variant, modified_variant_filter): variants.append(variant) variants = sorted(variants, key=lambda v: v.unique_tuple()) for v in variants: yield v
def get_project_variants_in_gene(self, project_id, gene_id, variant_filter=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) db_query = self._make_db_query(None, modified_variant_filter) sys.stderr.write("Project Gene Search: " + str(project_id) + " all variants query: " + str(db_query)) collection = self._get_project_collection(project_id) # we have to collect list in memory here because mongo can't sort on xpos, # as result size can get too big. # need to find a better way to do this. variants = [] for variant_dict in collection.find(db_query).hint([ ('db_gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING) ]): variant = Variant.fromJSON(variant_dict) self.add_annotations_to_variant(variant, project_id) if passes_variant_filter(variant, modified_variant_filter): variants.append(variant) variants = sorted(variants, key=lambda v: v.unique_tuple()) return variants
def get_variants_by_family_for_gene(mall, family_list, inheritance_mode, gene_id, variant_filter=None, quality_filter=None, user=None): if variant_filter is None: variant_filter = VariantFilter() variant_filter.add_gene(gene_id) by_family = {} for family in family_list: family_t = (family.project_id, family.family_id) variants = list( get_variants_with_inheritance_mode( mall, family, inheritance_mode, variant_filter, quality_filter, user=user, )) by_family[family_t] = variants return by_family
def get_variants_in_gene(self, project_id, family_id, gene_id, genotype_filter=None, variant_filter=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) #db_query = self._make_db_query(genotype_filter, modified_variant_filter, user=None) raise ValueError("Not Implemented")
def get_project_variants_in_gene(self, project_id, gene_id, variant_filter=None, user=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) variants = [variant for variant in self.get_elasticsearch_variants(project_id, variant_filter=modified_variant_filter, user=user, max_results_limit=9999)] return variants
def get_variants_in_gene(self, project_id, gene_id, variant_filter=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) db_query = self._make_db_query(None, modified_variant_filter) collection = self._get_project_collection(project_id) variants = [] for variant_dict in collection.find(db_query).hint([('gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)]): variant = Variant.fromJSON(variant_dict) if passes_variant_filter(variant, modified_variant_filter): variants.append(variant) variants = sorted(variants, key=lambda v: v.unique_tuple()) return variants
def get_project_variants_in_gene(self, project_id, gene_id, variant_filter=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) db_query = self._make_db_query(None, modified_variant_filter) logger.info("Project Gene Search: " + str(project_id) + " all variants query: " + str(db_query)) collection = self._get_project_collection(project_id) # we have to collect list in memory here because mongo can't sort on xpos, # as result size can get too big. # need to find a better way to do this. variants = [Variant.fromJSON(variant_dict) for variant_dict in collection.find(db_query).hint([('db_gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)])] self.add_annotations_to_variants(variants, project_id) variants = filter(lambda variant: passes_variant_filter(variant, modified_variant_filter), variants) variants = sorted(variants, key=lambda v: v.unique_tuple()) return variants
def get_variants_by_family_for_gene(mall, family_list, inheritance_mode, gene_id, variant_filter=None, quality_filter=None): if variant_filter is None: variant_filter = VariantFilter() variant_filter.add_gene(gene_id) by_family = {} for family in family_list: family_t = (family.project_id, family.family_id) variants = list(get_variants_with_inheritance_mode( mall, family, inheritance_mode, variant_filter, quality_filter )) by_family[family_t] = variants return by_family
def fromJSON(spec_dict): spec = MendelianVariantSearchSpec() spec.search_mode = spec_dict.get('search_mode') spec.inheritance_mode = spec_dict.get('inheritance_mode') spec.genotype_inheritance_filter = spec_dict.get('genotype_inheritance_filter') spec.gene_burden_filter = spec_dict.get('gene_burden_filter') spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter')) spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter') if 'allele_count_filter' in spec_dict: spec.allele_count_filter = AlleleCountFilter(**spec_dict.get('allele_count_filter')) return spec
def get_variants_in_gene(self, project_id, family_id, gene_id, genotype_filter=None, variant_filter=None): if variant_filter is None: modified_variant_filter = VariantFilter() else: modified_variant_filter = copy.deepcopy(variant_filter) modified_variant_filter.add_gene(gene_id) db_query = _make_db_query(genotype_filter, modified_variant_filter) collection = self._get_family_collection(project_id, family_id) # we have to collect list in memory here because mongo can't sort on xpos, # as result size can get too big. # need to find a better way to do this. variants = [] for variant_dict in collection.find(db_query).hint([('gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)]): variant = Variant.fromJSON(variant_dict) if passes_variant_filter(variant, modified_variant_filter): variants.append(variant) variants = sorted(variants, key=lambda v: v.unique_tuple()) for v in variants: yield v
def parse_variant_filter(cleaned_data): """ Sets cleaned_data['variant_filter'] for a form, throwing ValidationError if necessary """ if cleaned_data.get('variant_filter'): variant_filter_d = json.loads(cleaned_data.get('variant_filter')) if variant_filter_d.get('genes_raw'): success, result = utils.get_gene_id_list_from_raw(variant_filter_d.get('genes_raw'), get_reference()) if not success: raise forms.ValidationError("{} is not a recognized gene.".format(result)) variant_filter_d['genes'] = result del variant_filter_d['genes_raw'] if variant_filter_d.get('regions'): success, result = utils.get_locations_from_raw(variant_filter_d.get('regions'), get_reference()) if not success: raise forms.ValidationError("%s is not a recognized region" % result) variant_filter_d['locations'] = result del variant_filter_d['regions'] cleaned_data['variant_filter'] = VariantFilter(**variant_filter_d)
def fromJSON(spec_dict): spec = CohortGeneSearchSpec() spec.inheritance_mode = spec_dict.get('inheritance_mode') spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter')) spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter') return spec
def fromJSON(spec_dict): spec = DiagnosticSearchSpec() spec.gene_ids = spec_dict.get('gene_ids') spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter')) spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter') return spec
def fromJSON(spec_dict): spec = CombineMendelianFamiliesSpec() spec.inheritance_mode = spec_dict.get('inheritance_mode') spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter')) spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter') return spec
def handle_individual(self, project, individual): project_id = project.project_id individual_id = individual.indiv_id print("Processing individual %s" % individual_id) # get variants that have been tagged or that have a note that starts with "REPORT" variants_in_report_and_notes = defaultdict(str) for vt in VariantTag.objects.filter(project_tag__project=project, project_tag__tag="REPORT", family=individual.family): variants_in_report_and_notes[(vt.xpos, vt.ref, vt.alt)] = "" for vn in VariantNote.objects.filter(project=project, family=individual.family): if vn.note and vn.note.strip().startswith("REPORT"): variants_in_report_and_notes[(vn.xpos, vn.ref, vn.alt)] = "" header = [ "gene_name", "genotype", "variant", "functional_class", "hgvs_c", "hgvs_p", "rsid", "exac_global_af", "exac_pop_max_af", "exac_pop_max_population", "clinvar_clinsig", "clinvar_clnrevstat", "number_of_stars", "clinvar_url", "comments" ] if len(variants_in_report_and_notes) != 0: with open( "report_for_%s_%s.flagged.txt" % (project_id, individual_id), "w") as out: #print("\t".join(header)) out.write("\t".join(header) + "\n") # retrieve text of all notes that were left for any of these variants for vn in VariantNote.objects.filter(project=project, family=individual.family): if vn.note and (vn.xpos, vn.ref, vn.alt) in variants_in_report_and_notes: other_notes = variants_in_report_and_notes[(vn.xpos, vn.ref, vn.alt)] if len(other_notes) > 0: other_notes += "||" variants_in_report_and_notes[( vn.xpos, vn.ref, vn.alt)] = other_notes + "%s|%s|%s" % ( vn.date_saved, vn.user.email, vn.note.strip()) for (xpos, ref, alt), notes in variants_in_report_and_notes.items(): #chrom, pos = genomeloc.get_chr_pos(xpos) v = get_mall(project).variant_store.get_single_variant( project_id, individual.family.family_id, xpos, ref, alt) if v is None: print( "Rerieving variant from previous callset version (MYOSEQ_v20_previous1)" ) v = get_mall(project).variant_store.get_single_variant( 'MYOSEQ_v20_previous1', individual.family.family_id, xpos, ref, alt) if v is None: raise ValueError( "Couldn't find variant in variant store for: %s, %s, %s %s %s" % (project_id, individual.family.family_id, xpos, ref, alt)) row = self.get_output_row(v, xpos, ref, alt, individual_id, individual.family, all_fields=True, comments=notes) if row is None: continue out.write("\t".join(row) + "\n") with open("report_for_%s_%s.genes.txt" % (project_id, individual_id), "w") as out: header = ["gene_chrom", "gene_start", "gene_end" ] + header + ["json_dump"] out.write("\t".join(header) + "\n") for gene_id, (chrom, start, end) in gene_loc.items(): xpos_start = genomeloc.get_single_location( "chr" + chrom, start) xpos_end = genomeloc.get_single_location("chr" + chrom, end) variant_filter = VariantFilter(locations=[(xpos_start, xpos_end)]) for v in get_mall(project).variant_store.get_variants( project_id, individual.family.family_id, variant_filter=variant_filter): json_dump = str(v.genotypes) try: notes = variants_in_report_and_notes[(v.xpos, v.ref, v.alt)] except KeyError: notes = "" row = self.get_output_row(v, v.xpos, v.ref, v.alt, individual_id, individual.family, comments=notes, gene_id=gene_id) if row is None: continue row = map(str, ["chr" + chrom.replace("chr", ""), start, end] + row + [json_dump]) out.write("\t".join(row) + "\n")