def __get_example_panel(cls) -> Panel: dpyd_two_a_variant = Variant("rs3918290", "T") dpyd_two_b_variant = Variant("rs1801159", "C") dpyd_three_variant = Variant("rs72549303", "TG") fake_variant = Variant("rs1212125", "C") fake2_variant = Variant("rs1212127", "C") dpyd_haplotypes = frozenset({ Haplotype("*2A", "No Function", frozenset({dpyd_two_a_variant})), Haplotype("*2B", "No Function", frozenset({dpyd_two_a_variant, dpyd_two_b_variant})), Haplotype("*3", "Normal Function", frozenset({dpyd_three_variant})), }) dpyd_rs_id_infos = frozenset({ RsIdInfo("rs3918290", "C", "C", GeneCoordinate("1", 97915614), GeneCoordinate("chr1", 97450058)), RsIdInfo("rs72549309", "GATGA", "GATGA", GeneCoordinate("1", 98205966), GeneCoordinate("chr1", 97740410)), RsIdInfo("rs1801159", "T", "T", GeneCoordinate("1", 97981395), GeneCoordinate("chr1", 97515839)), RsIdInfo("rs72549303", "TG", "TC", GeneCoordinate("1", 97915621), GeneCoordinate("chr1", 97450065)), }) dpyd_drugs = frozenset({ DrugInfo("5-Fluorouracil", "https://www.pharmgkb.org/chemical/PA128406956/guidelineAnnotation/PA166104939"), DrugInfo("Capecitabine", "https://www.pharmgkb.org/chemical/PA448771/guidelineAnnotation/PA166104963"), }) dpyd_rs_id_to_difference_annotations = { "rs72549303": Annotation("6744CA>GA", "6744GA>CA"), } fake_haplotypes = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake_variant})), }) fake_rs_id_infos = frozenset({ RsIdInfo("rs1212125", "T", "T", GeneCoordinate("5", 97915617), GeneCoordinate("chr5", 97450060)), }) fake_drugs = frozenset({ DrugInfo("Aspirin", "https://www.pharmgkb.org/some_other_url"), }) fake_rs_id_to_difference_annotations: Dict[str, Annotation] = {} fake2_haplotypes = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake2_variant})), }) fake2_rs_id_infos = frozenset({ RsIdInfo("rs1212127", "C", "T", GeneCoordinate("16", 97915617), GeneCoordinate("chr16", 97450060)), }) fake2_drugs = frozenset({ DrugInfo("Aspirin", "https://www.pharmgkb.org/some_other_url"), }) fake2_rs_id_to_difference_annotations = {"rs1212127": Annotation("1324C>T", "1324T>C")} gene_infos = frozenset({ GeneInfo("DPYD", "*1", dpyd_haplotypes, dpyd_rs_id_infos, dpyd_drugs, dpyd_rs_id_to_difference_annotations), GeneInfo("FAKE", "*1", fake_haplotypes, fake_rs_id_infos, fake_drugs, fake_rs_id_to_difference_annotations), GeneInfo("FAKE2", "*1", fake2_haplotypes, fake2_rs_id_infos, fake2_drugs, fake2_rs_id_to_difference_annotations), }) name = "Panel" version = "0.2" return Panel(name, version, gene_infos)
def test_empty_haplotype(self) -> None: """Error when haplotype has no variants""" name = "*5" function = "No Function" variants = frozenset( [Variant("rs88293", "A"), Variant("rs39492", "T")]) empty_variants: FrozenSet[Variant] = frozenset() Haplotype(name, function, variants) with self.assertRaises(ValueError): Haplotype(name, function, empty_variants)
def test_haplotype_with_repeat_rs_ids(self) -> None: """Error when haplotype has multiple variants with the same rs id""" name = "*5" function = "No Function" non_clashing_variants = frozenset( [Variant("rs88293", "A"), Variant("rs39492", "T")]) clashing_variants = frozenset( [Variant("rs88293", "A"), Variant("rs88293", "T")]) Haplotype(name, function, non_clashing_variants) with self.assertRaises(ValueError): Haplotype(name, function, clashing_variants)
def test_gene_info_with_variant_that_is_ref_v38(self) -> None: """Error when gene info has variant where variant allele is the ref allele""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" drugs: FrozenSet[DrugInfo] = frozenset() rs_id = "rs294924" rs_id_to_ref_seq_difference_annotation = { rs_id: Annotation("399483A>C", "399483C>A") } haplotypes = frozenset([ Haplotype("*3", "No Function", frozenset([Variant("rs294924", "G")])) ]) rs_id_infos1 = frozenset([ RsIdInfo(rs_id, "A", "C", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) ]) rs_id_infos2 = frozenset([ RsIdInfo(rs_id, "A", "G", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) ]) GeneInfo(gene, reference_haplotype_name, haplotypes, rs_id_infos1, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes, rs_id_infos2, drugs, rs_id_to_ref_seq_difference_annotation)
def test_gene_info_with_overlapping_haplotype_variants(self) -> None: """Error when different haplotypes have the exact same variant combination""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() variant1 = Variant("rs94982", "A") variant2 = Variant("rs394934", "T") variant3 = Variant("rs495825", "C") rs_id_infos = frozenset([ RsIdInfo(variant1.rs_id, "C", "C", GeneCoordinate(chromosome_v37, 4994545), GeneCoordinate(chromosome_v38, 2993823)), RsIdInfo(variant2.rs_id, "G", "G", GeneCoordinate(chromosome_v37, 3993842), GeneCoordinate(chromosome_v38, 2949923)), RsIdInfo(variant3.rs_id, "A", "A", GeneCoordinate(chromosome_v37, 293923), GeneCoordinate(chromosome_v38, 138812)), ]) haplotypes1 = frozenset([ Haplotype("*2", "No Function", frozenset([variant1, variant2])), Haplotype("*4", "Partial Function", frozenset([variant1, variant3])), ]) haplotypes2 = frozenset([ Haplotype("*2", "No Function", frozenset([variant1, variant2])), Haplotype("*4", "Partial Function", frozenset([variant1, variant2])), ]) GeneInfo(gene, reference_haplotype_name, haplotypes1, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes2, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation)
def test_gene_info_with_rs_id_in_haplotype_without_info(self) -> None: """Error when haplotype uses rs id for which there is no RsIdInfo object""" gene = "FAKE" reference_haplotype_name = "*1" rs_id_infos: FrozenSet[RsIdInfo] = frozenset() drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() empty_haplotypes: FrozenSet[Haplotype] = frozenset() non_empty_haplotypes = frozenset([ Haplotype("*2", "No Function", frozenset([Variant("rs238423", "A")])) ]) GeneInfo(gene, reference_haplotype_name, empty_haplotypes, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, non_empty_haplotypes, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation)
def __get_variant_to_count_for_gene( cls, full_call_data: FullCallData, gene: str) -> DefaultDict[Variant, int]: full_calls_for_gene = { call for call in full_call_data.calls if call.gene == gene } variant_to_count: DefaultDict[Variant, int] = collections.defaultdict(int) for call in full_calls_for_gene: cls.__assert_handleable_call(call) for annotated_allele in call.get_annotated_alleles(): if not annotated_allele.is_annotated_vs( cls.HAPLOTYPE_CALLING_REFERENCE_ASSEMBLY): error_msg = f"Unknown variant: allele={annotated_allele}" raise ValueError(error_msg) if annotated_allele.is_variant_vs( cls.HAPLOTYPE_CALLING_REFERENCE_ASSEMBLY): variant_to_count[Variant(call.rs_ids[0], annotated_allele.allele)] += 1 return variant_to_count
def test_load_panel(self) -> None: """Load panel from json""" panel_path = get_test_resource("test_panel.json") panel = load_panel(str(panel_path)) dpyd_two_a_variant = Variant("rs3918290", "T") dpyd_two_b_variant = Variant("rs1801159", "C") dpyd_three_variant = Variant("rs72549303", "TG") fake_variant = Variant("rs1212125", "C") fake2_variant = Variant("rs1212127", "C") dpyd_haplotypes_expected = frozenset({ Haplotype("*2A", "No Function", frozenset({dpyd_two_a_variant})), Haplotype("*2B", "No Function", frozenset({dpyd_two_a_variant, dpyd_two_b_variant})), Haplotype("*3", "Normal Function", frozenset({dpyd_three_variant})), }) dpyd_rs_id_infos_expected = frozenset({ RsIdInfo("rs3918290", "C", "C", GeneCoordinate("1", 97915614), GeneCoordinate("chr1", 97450058)), RsIdInfo("rs72549309", "GATGA", "GATGA", GeneCoordinate("1", 98205966), GeneCoordinate("chr1", 97740410)), RsIdInfo("rs1801159", "T", "T", GeneCoordinate("1", 97981395), GeneCoordinate("chr1", 97515839)), RsIdInfo("rs72549303", "TG", "TC", GeneCoordinate("1", 97915621), GeneCoordinate("chr1", 97450065)), RsIdInfo("rs1801265", "G", "A", GeneCoordinate("1", 98348885), GeneCoordinate("chr1", 97883329)), }) dpyd_drugs_expected = frozenset({ DrugInfo("5-Fluorouracil", "https://www.source_url.org/5-Fluorouracil"), DrugInfo("Capecitabine", "https://www.source_url.org/Capecitabine"), }) dpyd_rs_id_to_difference_annotations = { "rs72549303": Annotation("6744CA>GA", "6744GA>CA"), "rs1801265": Annotation("85C>T", "85T>C"), } fake_haplotypes_expected = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake_variant})), }) fake_rs_id_infos_expected = frozenset({ RsIdInfo("rs1212125", "T", "T", GeneCoordinate("5", 97915617), GeneCoordinate("chr5", 97450060)), }) fake_drugs_expected = frozenset({ DrugInfo("Aspirin", "https://www.source_url.org/Aspirin"), }) fake_rs_id_to_difference_annotations: Dict[str, Annotation] = {} fake2_haplotypes_expected = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake2_variant})), }) fake2_rs_id_infos_expected = frozenset({ RsIdInfo("rs1212127", "C", "T", GeneCoordinate("16", 97915617), GeneCoordinate("chr16", 97450060)), }) fake2_drugs_expected = frozenset({ DrugInfo("Aspirin", "https://www.source_url.org/Aspirin"), }) fake2_rs_id_to_difference_annotations = { "rs1212127": Annotation("1324C>T", "1324T>C") } gene_infos_expected = frozenset({ GeneInfo("DPYD", "*1", dpyd_haplotypes_expected, dpyd_rs_id_infos_expected, dpyd_drugs_expected, dpyd_rs_id_to_difference_annotations), GeneInfo("FAKE", "*1", fake_haplotypes_expected, fake_rs_id_infos_expected, fake_drugs_expected, fake_rs_id_to_difference_annotations), GeneInfo("FAKE2", "*1", fake2_haplotypes_expected, fake2_rs_id_infos_expected, fake2_drugs_expected, fake2_rs_id_to_difference_annotations), }) name_expected = "fake_panel" version_expected = "0.3" panel_expected = Panel(name_expected, version_expected, gene_infos_expected) self.assertEqual(panel_expected, panel)