Esempio n. 1
0
    def get_variants_in_gene(self,
                             project_id,
                             family_id,
                             gene_id,
                             genotype_filter=None,
                             variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        db_query = self._make_db_query(genotype_filter,
                                       modified_variant_filter)
        collection = self._get_family_collection(project_id, family_id)
        if not collection:
            return

        # we have to collect list in memory here because mongo can't sort on xpos,
        # as result size can get too big.
        # need to find a better way to do this.
        variants = []
        for variant_dict in collection.find(db_query).hint([
            ('db_gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)
        ]):
            variant = Variant.fromJSON(variant_dict)
            self.add_annotations_to_variant(variant, project_id)
            if passes_variant_filter(variant, modified_variant_filter):
                variants.append(variant)
        variants = sorted(variants, key=lambda v: v.unique_tuple())
        for v in variants:
            yield v
Esempio n. 2
0
    def get_project_variants_in_gene(self,
                                     project_id,
                                     gene_id,
                                     variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        db_query = self._make_db_query(None, modified_variant_filter)
        sys.stderr.write("Project Gene Search: " + str(project_id) +
                         " all variants query: " + str(db_query))
        collection = self._get_project_collection(project_id)
        # we have to collect list in memory here because mongo can't sort on xpos,
        # as result size can get too big.
        # need to find a better way to do this.
        variants = []
        for variant_dict in collection.find(db_query).hint([
            ('db_gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)
        ]):
            variant = Variant.fromJSON(variant_dict)
            self.add_annotations_to_variant(variant, project_id)
            if passes_variant_filter(variant, modified_variant_filter):
                variants.append(variant)
        variants = sorted(variants, key=lambda v: v.unique_tuple())
        return variants
Esempio n. 3
0
def get_variants_by_family_for_gene(mall,
                                    family_list,
                                    inheritance_mode,
                                    gene_id,
                                    variant_filter=None,
                                    quality_filter=None,
                                    user=None):

    if variant_filter is None:
        variant_filter = VariantFilter()
    variant_filter.add_gene(gene_id)

    by_family = {}
    for family in family_list:
        family_t = (family.project_id, family.family_id)
        variants = list(
            get_variants_with_inheritance_mode(
                mall,
                family,
                inheritance_mode,
                variant_filter,
                quality_filter,
                user=user,
            ))
        by_family[family_t] = variants

    return by_family
    def get_variants_in_gene(self, project_id, family_id, gene_id, genotype_filter=None, variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        #db_query = self._make_db_query(genotype_filter, modified_variant_filter, user=None)
        raise ValueError("Not Implemented")
    def get_project_variants_in_gene(self, project_id, gene_id, variant_filter=None, user=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        variants = [variant for variant in self.get_elasticsearch_variants(project_id, variant_filter=modified_variant_filter, user=user, max_results_limit=9999)]
        return variants
Esempio n. 6
0
    def get_project_variants_in_gene(self, project_id, gene_id, variant_filter=None, user=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        variants = [variant for variant in self.get_elasticsearch_variants(project_id, variant_filter=modified_variant_filter, user=user, max_results_limit=9999)]
        return variants
Esempio n. 7
0
    def get_variants_in_gene(self, project_id, family_id, gene_id, genotype_filter=None, variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        #db_query = self._make_db_query(genotype_filter, modified_variant_filter, user=None)
        raise ValueError("Not Implemented")
Esempio n. 8
0
    def get_variants_in_gene(self, project_id, gene_id, variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        db_query = self._make_db_query(None, modified_variant_filter)
        collection = self._get_project_collection(project_id)

        variants = []
        for variant_dict in collection.find(db_query).hint([('gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)]):
            variant = Variant.fromJSON(variant_dict)
            if passes_variant_filter(variant, modified_variant_filter):
                variants.append(variant)
        variants = sorted(variants, key=lambda v: v.unique_tuple())
        return variants
Esempio n. 9
0
    def get_project_variants_in_gene(self, project_id, gene_id, variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        db_query = self._make_db_query(None, modified_variant_filter)
        logger.info("Project Gene Search: " + str(project_id) + " all variants query: " + str(db_query))
        collection = self._get_project_collection(project_id)
        # we have to collect list in memory here because mongo can't sort on xpos,
        # as result size can get too big.
        # need to find a better way to do this.
        variants = [Variant.fromJSON(variant_dict) for variant_dict in collection.find(db_query).hint([('db_gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)])]
        self.add_annotations_to_variants(variants, project_id)
        variants = filter(lambda variant: passes_variant_filter(variant, modified_variant_filter), variants)
        variants = sorted(variants, key=lambda v: v.unique_tuple())
        return variants
Esempio n. 10
0
def get_variants_by_family_for_gene(mall, family_list, inheritance_mode, gene_id, variant_filter=None, quality_filter=None):

    if variant_filter is None:
        variant_filter = VariantFilter()
    variant_filter.add_gene(gene_id)

    by_family = {}
    for family in family_list:
        family_t = (family.project_id, family.family_id)
        variants = list(get_variants_with_inheritance_mode(
            mall,
            family,
            inheritance_mode,
            variant_filter,
            quality_filter
        ))
        by_family[family_t] = variants

    return by_family
Esempio n. 11
0
 def fromJSON(spec_dict):
     spec = MendelianVariantSearchSpec()
     spec.search_mode = spec_dict.get('search_mode')
     spec.inheritance_mode = spec_dict.get('inheritance_mode')
     spec.genotype_inheritance_filter = spec_dict.get('genotype_inheritance_filter')
     spec.gene_burden_filter = spec_dict.get('gene_burden_filter')
     spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter'))
     spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter')
     if 'allele_count_filter' in spec_dict:
         spec.allele_count_filter = AlleleCountFilter(**spec_dict.get('allele_count_filter'))
     return spec
Esempio n. 12
0
    def get_variants_in_gene(self, project_id, family_id, gene_id, genotype_filter=None, variant_filter=None):

        if variant_filter is None:
            modified_variant_filter = VariantFilter()
        else:
            modified_variant_filter = copy.deepcopy(variant_filter)
        modified_variant_filter.add_gene(gene_id)

        db_query = _make_db_query(genotype_filter, modified_variant_filter)
        collection = self._get_family_collection(project_id, family_id)

        # we have to collect list in memory here because mongo can't sort on xpos,
        # as result size can get too big.
        # need to find a better way to do this.
        variants = []
        for variant_dict in collection.find(db_query).hint([('gene_ids', pymongo.ASCENDING), ('xpos', pymongo.ASCENDING)]):
            variant = Variant.fromJSON(variant_dict)
            if passes_variant_filter(variant, modified_variant_filter):
                variants.append(variant)
        variants = sorted(variants, key=lambda v: v.unique_tuple())
        for v in variants:
            yield v
Esempio n. 13
0
def parse_variant_filter(cleaned_data):
    """
    Sets cleaned_data['variant_filter'] for a form, throwing ValidationError if necessary
    """
    if cleaned_data.get('variant_filter'):
        variant_filter_d = json.loads(cleaned_data.get('variant_filter'))
        if variant_filter_d.get('genes_raw'):
            success, result = utils.get_gene_id_list_from_raw(variant_filter_d.get('genes_raw'), get_reference())
            if not success:
                raise forms.ValidationError("{} is not a recognized gene.".format(result))
            variant_filter_d['genes'] = result
            del variant_filter_d['genes_raw']

        if variant_filter_d.get('regions'):
            success, result = utils.get_locations_from_raw(variant_filter_d.get('regions'), get_reference())
            if not success:
                raise forms.ValidationError("%s is not a recognized region" % result)
            variant_filter_d['locations'] = result
            del variant_filter_d['regions']
        cleaned_data['variant_filter'] = VariantFilter(**variant_filter_d)
Esempio n. 14
0
 def fromJSON(spec_dict):
     spec = CohortGeneSearchSpec()
     spec.inheritance_mode = spec_dict.get('inheritance_mode')
     spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter'))
     spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter')
     return spec
Esempio n. 15
0
 def fromJSON(spec_dict):
     spec = DiagnosticSearchSpec()
     spec.gene_ids = spec_dict.get('gene_ids')
     spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter'))
     spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter')
     return spec
Esempio n. 16
0
 def fromJSON(spec_dict):
     spec = CombineMendelianFamiliesSpec()
     spec.inheritance_mode = spec_dict.get('inheritance_mode')
     spec.variant_filter = VariantFilter(**spec_dict.get('variant_filter'))
     spec.genotype_quality_filter = spec_dict.get('genotype_quality_filter')
     return spec
Esempio n. 17
0
    def handle_individual(self, project, individual):
        project_id = project.project_id
        individual_id = individual.indiv_id

        print("Processing individual %s" % individual_id)
        # get variants that have been tagged or that have a note that starts with "REPORT"
        variants_in_report_and_notes = defaultdict(str)
        for vt in VariantTag.objects.filter(project_tag__project=project,
                                            project_tag__tag="REPORT",
                                            family=individual.family):

            variants_in_report_and_notes[(vt.xpos, vt.ref, vt.alt)] = ""

        for vn in VariantNote.objects.filter(project=project,
                                             family=individual.family):
            if vn.note and vn.note.strip().startswith("REPORT"):
                variants_in_report_and_notes[(vn.xpos, vn.ref, vn.alt)] = ""

        header = [
            "gene_name", "genotype", "variant", "functional_class", "hgvs_c",
            "hgvs_p", "rsid", "exac_global_af", "exac_pop_max_af",
            "exac_pop_max_population", "clinvar_clinsig", "clinvar_clnrevstat",
            "number_of_stars", "clinvar_url", "comments"
        ]

        if len(variants_in_report_and_notes) != 0:
            with open(
                    "report_for_%s_%s.flagged.txt" %
                (project_id, individual_id), "w") as out:
                #print("\t".join(header))
                out.write("\t".join(header) + "\n")

                # retrieve text of all notes that were left for any of these variants
                for vn in VariantNote.objects.filter(project=project,
                                                     family=individual.family):
                    if vn.note and (vn.xpos, vn.ref,
                                    vn.alt) in variants_in_report_and_notes:
                        other_notes = variants_in_report_and_notes[(vn.xpos,
                                                                    vn.ref,
                                                                    vn.alt)]
                        if len(other_notes) > 0:
                            other_notes += "||"
                        variants_in_report_and_notes[(
                            vn.xpos, vn.ref,
                            vn.alt)] = other_notes + "%s|%s|%s" % (
                                vn.date_saved, vn.user.email, vn.note.strip())

                for (xpos, ref,
                     alt), notes in variants_in_report_and_notes.items():

                    #chrom, pos = genomeloc.get_chr_pos(xpos)

                    v = get_mall(project).variant_store.get_single_variant(
                        project_id, individual.family.family_id, xpos, ref,
                        alt)
                    if v is None:
                        print(
                            "Rerieving variant from previous callset version (MYOSEQ_v20_previous1)"
                        )
                        v = get_mall(project).variant_store.get_single_variant(
                            'MYOSEQ_v20_previous1',
                            individual.family.family_id, xpos, ref, alt)
                    if v is None:
                        raise ValueError(
                            "Couldn't find variant in variant store for: %s, %s, %s %s %s"
                            % (project_id, individual.family.family_id, xpos,
                               ref, alt))

                    row = self.get_output_row(v,
                                              xpos,
                                              ref,
                                              alt,
                                              individual_id,
                                              individual.family,
                                              all_fields=True,
                                              comments=notes)
                    if row is None:
                        continue

                    out.write("\t".join(row) + "\n")

        with open("report_for_%s_%s.genes.txt" % (project_id, individual_id),
                  "w") as out:
            header = ["gene_chrom", "gene_start", "gene_end"
                      ] + header + ["json_dump"]

            out.write("\t".join(header) + "\n")
            for gene_id, (chrom, start, end) in gene_loc.items():
                xpos_start = genomeloc.get_single_location(
                    "chr" + chrom, start)
                xpos_end = genomeloc.get_single_location("chr" + chrom, end)
                variant_filter = VariantFilter(locations=[(xpos_start,
                                                           xpos_end)])
                for v in get_mall(project).variant_store.get_variants(
                        project_id,
                        individual.family.family_id,
                        variant_filter=variant_filter):

                    json_dump = str(v.genotypes)
                    try:
                        notes = variants_in_report_and_notes[(v.xpos, v.ref,
                                                              v.alt)]
                    except KeyError:
                        notes = ""
                    row = self.get_output_row(v,
                                              v.xpos,
                                              v.ref,
                                              v.alt,
                                              individual_id,
                                              individual.family,
                                              comments=notes,
                                              gene_id=gene_id)
                    if row is None:
                        continue
                    row = map(str,
                              ["chr" + chrom.replace("chr", ""), start, end] +
                              row + [json_dump])

                    out.write("\t".join(row) + "\n")