コード例 #1
0
    def __get_example_panel(cls) -> Panel:
        dpyd_two_a_variant = Variant("rs3918290", "T")
        dpyd_two_b_variant = Variant("rs1801159", "C")
        dpyd_three_variant = Variant("rs72549303", "TG")
        fake_variant = Variant("rs1212125", "C")
        fake2_variant = Variant("rs1212127", "C")

        dpyd_haplotypes = frozenset({
            Haplotype("*2A", "No Function", frozenset({dpyd_two_a_variant})),
            Haplotype("*2B", "No Function", frozenset({dpyd_two_a_variant, dpyd_two_b_variant})),
            Haplotype("*3", "Normal Function", frozenset({dpyd_three_variant})),
        })
        dpyd_rs_id_infos = frozenset({
            RsIdInfo("rs3918290", "C", "C", GeneCoordinate("1", 97915614), GeneCoordinate("chr1", 97450058)),
            RsIdInfo("rs72549309", "GATGA", "GATGA", GeneCoordinate("1", 98205966), GeneCoordinate("chr1", 97740410)),
            RsIdInfo("rs1801159", "T", "T", GeneCoordinate("1", 97981395), GeneCoordinate("chr1", 97515839)),
            RsIdInfo("rs72549303", "TG", "TC", GeneCoordinate("1", 97915621), GeneCoordinate("chr1", 97450065)),
        })
        dpyd_drugs = frozenset({
            DrugInfo("5-Fluorouracil", "https://www.pharmgkb.org/chemical/PA128406956/guidelineAnnotation/PA166104939"),
            DrugInfo("Capecitabine", "https://www.pharmgkb.org/chemical/PA448771/guidelineAnnotation/PA166104963"),
        })
        dpyd_rs_id_to_difference_annotations = {
            "rs72549303": Annotation("6744CA>GA", "6744GA>CA"),
        }

        fake_haplotypes = frozenset({
            Haplotype("*4A", "Reduced Function", frozenset({fake_variant})),
        })
        fake_rs_id_infos = frozenset({
            RsIdInfo("rs1212125", "T", "T", GeneCoordinate("5", 97915617), GeneCoordinate("chr5", 97450060)),
        })
        fake_drugs = frozenset({
            DrugInfo("Aspirin", "https://www.pharmgkb.org/some_other_url"),
        })
        fake_rs_id_to_difference_annotations: Dict[str, Annotation] = {}

        fake2_haplotypes = frozenset({
            Haplotype("*4A", "Reduced Function", frozenset({fake2_variant})),
        })
        fake2_rs_id_infos = frozenset({
            RsIdInfo("rs1212127", "C", "T", GeneCoordinate("16", 97915617), GeneCoordinate("chr16", 97450060)),
        })
        fake2_drugs = frozenset({
            DrugInfo("Aspirin", "https://www.pharmgkb.org/some_other_url"),
        })
        fake2_rs_id_to_difference_annotations = {"rs1212127": Annotation("1324C>T", "1324T>C")}

        gene_infos = frozenset({
            GeneInfo("DPYD", "*1", dpyd_haplotypes, dpyd_rs_id_infos,
                     dpyd_drugs, dpyd_rs_id_to_difference_annotations),
            GeneInfo("FAKE", "*1", fake_haplotypes, fake_rs_id_infos,
                     fake_drugs, fake_rs_id_to_difference_annotations),
            GeneInfo("FAKE2", "*1", fake2_haplotypes, fake2_rs_id_infos,
                     fake2_drugs, fake2_rs_id_to_difference_annotations),
        })
        name = "Panel"
        version = "0.2"
        return Panel(name, version, gene_infos)
コード例 #2
0
    def test_empty_haplotype(self) -> None:
        """Error when haplotype has no variants"""
        name = "*5"
        function = "No Function"

        variants = frozenset(
            [Variant("rs88293", "A"),
             Variant("rs39492", "T")])
        empty_variants: FrozenSet[Variant] = frozenset()

        Haplotype(name, function, variants)
        with self.assertRaises(ValueError):
            Haplotype(name, function, empty_variants)
コード例 #3
0
    def test_haplotype_with_repeat_rs_ids(self) -> None:
        """Error when haplotype has multiple variants with the same rs id"""
        name = "*5"
        function = "No Function"

        non_clashing_variants = frozenset(
            [Variant("rs88293", "A"),
             Variant("rs39492", "T")])
        clashing_variants = frozenset(
            [Variant("rs88293", "A"),
             Variant("rs88293", "T")])

        Haplotype(name, function, non_clashing_variants)
        with self.assertRaises(ValueError):
            Haplotype(name, function, clashing_variants)
コード例 #4
0
    def test_gene_info_with_variant_that_is_ref_v38(self) -> None:
        """Error when gene info has variant where variant allele is the ref allele"""
        gene = "FAKE"
        chromosome_v37 = "X"
        chromosome_v38 = "chrX"
        reference_haplotype_name = "*1"
        drugs: FrozenSet[DrugInfo] = frozenset()

        rs_id = "rs294924"

        rs_id_to_ref_seq_difference_annotation = {
            rs_id: Annotation("399483A>C", "399483C>A")
        }
        haplotypes = frozenset([
            Haplotype("*3", "No Function",
                      frozenset([Variant("rs294924", "G")]))
        ])

        rs_id_infos1 = frozenset([
            RsIdInfo(rs_id, "A", "C", GeneCoordinate(chromosome_v37, 499593),
                     GeneCoordinate(chromosome_v38, 399483))
        ])
        rs_id_infos2 = frozenset([
            RsIdInfo(rs_id, "A", "G", GeneCoordinate(chromosome_v37, 499593),
                     GeneCoordinate(chromosome_v38, 399483))
        ])

        GeneInfo(gene, reference_haplotype_name, haplotypes, rs_id_infos1,
                 drugs, rs_id_to_ref_seq_difference_annotation)
        with self.assertRaises(ValueError):
            GeneInfo(gene, reference_haplotype_name, haplotypes, rs_id_infos2,
                     drugs, rs_id_to_ref_seq_difference_annotation)
コード例 #5
0
 def from_json(cls, data: Json) -> "Haplotype":
     name = str(data["haplotypeName"])
     function = str(data["function"])
     variants = frozenset({
         Variant.from_json(variant_json)
         for variant_json in data["haplotypeVariants"]
     })
     return Haplotype(name, function, variants)
コード例 #6
0
    def test_gene_info_with_overlapping_haplotype_variants(self) -> None:
        """Error when different haplotypes have the exact same variant combination"""
        gene = "FAKE"
        chromosome_v37 = "X"
        chromosome_v38 = "chrX"
        reference_haplotype_name = "*1"
        drugs: FrozenSet[DrugInfo] = frozenset()
        rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict()

        variant1 = Variant("rs94982", "A")
        variant2 = Variant("rs394934", "T")
        variant3 = Variant("rs495825", "C")

        rs_id_infos = frozenset([
            RsIdInfo(variant1.rs_id, "C", "C",
                     GeneCoordinate(chromosome_v37, 4994545),
                     GeneCoordinate(chromosome_v38, 2993823)),
            RsIdInfo(variant2.rs_id, "G", "G",
                     GeneCoordinate(chromosome_v37, 3993842),
                     GeneCoordinate(chromosome_v38, 2949923)),
            RsIdInfo(variant3.rs_id, "A", "A",
                     GeneCoordinate(chromosome_v37, 293923),
                     GeneCoordinate(chromosome_v38, 138812)),
        ])

        haplotypes1 = frozenset([
            Haplotype("*2", "No Function", frozenset([variant1, variant2])),
            Haplotype("*4", "Partial Function", frozenset([variant1,
                                                           variant3])),
        ])
        haplotypes2 = frozenset([
            Haplotype("*2", "No Function", frozenset([variant1, variant2])),
            Haplotype("*4", "Partial Function", frozenset([variant1,
                                                           variant2])),
        ])

        GeneInfo(gene, reference_haplotype_name, haplotypes1, rs_id_infos,
                 drugs, rs_id_to_ref_seq_difference_annotation)
        with self.assertRaises(ValueError):
            GeneInfo(gene, reference_haplotype_name, haplotypes2, rs_id_infos,
                     drugs, rs_id_to_ref_seq_difference_annotation)
コード例 #7
0
    def test_gene_info_with_rs_id_in_haplotype_without_info(self) -> None:
        """Error when haplotype uses rs id for which there is no RsIdInfo object"""
        gene = "FAKE"
        reference_haplotype_name = "*1"
        rs_id_infos: FrozenSet[RsIdInfo] = frozenset()
        drugs: FrozenSet[DrugInfo] = frozenset()
        rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict()

        empty_haplotypes: FrozenSet[Haplotype] = frozenset()
        non_empty_haplotypes = frozenset([
            Haplotype("*2", "No Function",
                      frozenset([Variant("rs238423", "A")]))
        ])

        GeneInfo(gene, reference_haplotype_name, empty_haplotypes, rs_id_infos,
                 drugs, rs_id_to_ref_seq_difference_annotation)
        with self.assertRaises(ValueError):
            GeneInfo(gene, reference_haplotype_name, non_empty_haplotypes,
                     rs_id_infos, drugs,
                     rs_id_to_ref_seq_difference_annotation)
コード例 #8
0
 def __get_variant_to_count_for_gene(
         cls, full_call_data: FullCallData,
         gene: str) -> DefaultDict[Variant, int]:
     full_calls_for_gene = {
         call
         for call in full_call_data.calls if call.gene == gene
     }
     variant_to_count: DefaultDict[Variant,
                                   int] = collections.defaultdict(int)
     for call in full_calls_for_gene:
         cls.__assert_handleable_call(call)
         for annotated_allele in call.get_annotated_alleles():
             if not annotated_allele.is_annotated_vs(
                     cls.HAPLOTYPE_CALLING_REFERENCE_ASSEMBLY):
                 error_msg = f"Unknown variant: allele={annotated_allele}"
                 raise ValueError(error_msg)
             if annotated_allele.is_variant_vs(
                     cls.HAPLOTYPE_CALLING_REFERENCE_ASSEMBLY):
                 variant_to_count[Variant(call.rs_ids[0],
                                          annotated_allele.allele)] += 1
     return variant_to_count
コード例 #9
0
    def test_load_panel(self) -> None:
        """Load panel from json"""
        panel_path = get_test_resource("test_panel.json")
        panel = load_panel(str(panel_path))

        dpyd_two_a_variant = Variant("rs3918290", "T")
        dpyd_two_b_variant = Variant("rs1801159", "C")
        dpyd_three_variant = Variant("rs72549303", "TG")
        fake_variant = Variant("rs1212125", "C")
        fake2_variant = Variant("rs1212127", "C")

        dpyd_haplotypes_expected = frozenset({
            Haplotype("*2A", "No Function", frozenset({dpyd_two_a_variant})),
            Haplotype("*2B", "No Function",
                      frozenset({dpyd_two_a_variant, dpyd_two_b_variant})),
            Haplotype("*3", "Normal Function",
                      frozenset({dpyd_three_variant})),
        })
        dpyd_rs_id_infos_expected = frozenset({
            RsIdInfo("rs3918290", "C", "C", GeneCoordinate("1", 97915614),
                     GeneCoordinate("chr1", 97450058)),
            RsIdInfo("rs72549309", "GATGA", "GATGA",
                     GeneCoordinate("1", 98205966),
                     GeneCoordinate("chr1", 97740410)),
            RsIdInfo("rs1801159", "T", "T", GeneCoordinate("1", 97981395),
                     GeneCoordinate("chr1", 97515839)),
            RsIdInfo("rs72549303", "TG", "TC", GeneCoordinate("1", 97915621),
                     GeneCoordinate("chr1", 97450065)),
            RsIdInfo("rs1801265", "G", "A", GeneCoordinate("1", 98348885),
                     GeneCoordinate("chr1", 97883329)),
        })
        dpyd_drugs_expected = frozenset({
            DrugInfo("5-Fluorouracil",
                     "https://www.source_url.org/5-Fluorouracil"),
            DrugInfo("Capecitabine",
                     "https://www.source_url.org/Capecitabine"),
        })
        dpyd_rs_id_to_difference_annotations = {
            "rs72549303": Annotation("6744CA>GA", "6744GA>CA"),
            "rs1801265": Annotation("85C>T", "85T>C"),
        }
        fake_haplotypes_expected = frozenset({
            Haplotype("*4A", "Reduced Function", frozenset({fake_variant})),
        })
        fake_rs_id_infos_expected = frozenset({
            RsIdInfo("rs1212125", "T", "T", GeneCoordinate("5", 97915617),
                     GeneCoordinate("chr5", 97450060)),
        })
        fake_drugs_expected = frozenset({
            DrugInfo("Aspirin", "https://www.source_url.org/Aspirin"),
        })
        fake_rs_id_to_difference_annotations: Dict[str, Annotation] = {}

        fake2_haplotypes_expected = frozenset({
            Haplotype("*4A", "Reduced Function", frozenset({fake2_variant})),
        })
        fake2_rs_id_infos_expected = frozenset({
            RsIdInfo("rs1212127", "C", "T", GeneCoordinate("16", 97915617),
                     GeneCoordinate("chr16", 97450060)),
        })
        fake2_drugs_expected = frozenset({
            DrugInfo("Aspirin", "https://www.source_url.org/Aspirin"),
        })
        fake2_rs_id_to_difference_annotations = {
            "rs1212127": Annotation("1324C>T", "1324T>C")
        }

        gene_infos_expected = frozenset({
            GeneInfo("DPYD", "*1", dpyd_haplotypes_expected,
                     dpyd_rs_id_infos_expected, dpyd_drugs_expected,
                     dpyd_rs_id_to_difference_annotations),
            GeneInfo("FAKE", "*1", fake_haplotypes_expected,
                     fake_rs_id_infos_expected, fake_drugs_expected,
                     fake_rs_id_to_difference_annotations),
            GeneInfo("FAKE2", "*1", fake2_haplotypes_expected,
                     fake2_rs_id_infos_expected, fake2_drugs_expected,
                     fake2_rs_id_to_difference_annotations),
        })
        name_expected = "fake_panel"
        version_expected = "0.3"
        panel_expected = Panel(name_expected, version_expected,
                               gene_infos_expected)

        self.assertEqual(panel_expected, panel)