def test_gene_info_with_variant_that_is_ref_v38(self) -> None: """Error when gene info has variant where variant allele is the ref allele""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" drugs: FrozenSet[DrugInfo] = frozenset() rs_id = "rs294924" rs_id_to_ref_seq_difference_annotation = { rs_id: Annotation("399483A>C", "399483C>A") } haplotypes = frozenset([ Haplotype("*3", "No Function", frozenset([Variant("rs294924", "G")])) ]) rs_id_infos1 = frozenset([ RsIdInfo(rs_id, "A", "C", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) ]) rs_id_infos2 = frozenset([ RsIdInfo(rs_id, "A", "G", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) ]) GeneInfo(gene, reference_haplotype_name, haplotypes, rs_id_infos1, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes, rs_id_infos2, drugs, rs_id_to_ref_seq_difference_annotation)
def test_gene_info_with_overlapping_rs_id_infos(self) -> None: """Error when gene info has rs id infos for which the relevant coordinates overlap""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" haplotypes: FrozenSet[Haplotype] = frozenset() drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() rs_id_info1 = RsIdInfo("rs294924", "A", "A", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) rs_id_info2 = RsIdInfo("rs294927", "AA", "AA", GeneCoordinate(chromosome_v37, 499592), GeneCoordinate(chromosome_v38, 399482)) single_rs_id_info = frozenset([rs_id_info1]) overlapping_rs_id_infos = frozenset([rs_id_info1, rs_id_info2]) GeneInfo(gene, reference_haplotype_name, haplotypes, single_rs_id_info, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes, overlapping_rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation)
def test_gene_info_with_rs_id_infos_for_different_chromosome(self) -> None: """Error when gene info has rs id infos for which the relevant coordinates have a different chromosome""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" haplotypes: FrozenSet[Haplotype] = frozenset() drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() other_chromosome_v37 = "1" other_chromosome_v38 = "1" rs_id_info1 = RsIdInfo("rs294924", "A", "A", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) rs_id_info2 = RsIdInfo("rs294924", "A", "A", GeneCoordinate(other_chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) rs_id_info3 = RsIdInfo("rs294924", "A", "A", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(other_chromosome_v38, 399483)) rs_id_info4 = RsIdInfo("rs294924", "A", "A", GeneCoordinate(other_chromosome_v37, 499593), GeneCoordinate(other_chromosome_v38, 399483)) GeneInfo(gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info1]), drugs, rs_id_to_ref_seq_difference_annotation) GeneInfo(gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info2]), drugs, rs_id_to_ref_seq_difference_annotation) GeneInfo(gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info3]), drugs, rs_id_to_ref_seq_difference_annotation) GeneInfo(gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info4]), drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info1, rs_id_info2]), drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info1, rs_id_info3]), drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo( gene, reference_haplotype_name, haplotypes, frozenset([rs_id_info1, rs_id_info2, rs_id_info3, rs_id_info4]), drugs, rs_id_to_ref_seq_difference_annotation)
def test_gene_info_with_overlapping_haplotype_variants(self) -> None: """Error when different haplotypes have the exact same variant combination""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() variant1 = Variant("rs94982", "A") variant2 = Variant("rs394934", "T") variant3 = Variant("rs495825", "C") rs_id_infos = frozenset([ RsIdInfo(variant1.rs_id, "C", "C", GeneCoordinate(chromosome_v37, 4994545), GeneCoordinate(chromosome_v38, 2993823)), RsIdInfo(variant2.rs_id, "G", "G", GeneCoordinate(chromosome_v37, 3993842), GeneCoordinate(chromosome_v38, 2949923)), RsIdInfo(variant3.rs_id, "A", "A", GeneCoordinate(chromosome_v37, 293923), GeneCoordinate(chromosome_v38, 138812)), ]) haplotypes1 = frozenset([ Haplotype("*2", "No Function", frozenset([variant1, variant2])), Haplotype("*4", "Partial Function", frozenset([variant1, variant3])), ]) haplotypes2 = frozenset([ Haplotype("*2", "No Function", frozenset([variant1, variant2])), Haplotype("*4", "Partial Function", frozenset([variant1, variant2])), ]) GeneInfo(gene, reference_haplotype_name, haplotypes1, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes2, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation)
def __get_example_panel(cls) -> Panel: dpyd_two_a_variant = Variant("rs3918290", "T") dpyd_two_b_variant = Variant("rs1801159", "C") dpyd_three_variant = Variant("rs72549303", "TG") fake_variant = Variant("rs1212125", "C") fake2_variant = Variant("rs1212127", "C") dpyd_haplotypes = frozenset({ Haplotype("*2A", "No Function", frozenset({dpyd_two_a_variant})), Haplotype("*2B", "No Function", frozenset({dpyd_two_a_variant, dpyd_two_b_variant})), Haplotype("*3", "Normal Function", frozenset({dpyd_three_variant})), }) dpyd_rs_id_infos = frozenset({ RsIdInfo("rs3918290", "C", "C", GeneCoordinate("1", 97915614), GeneCoordinate("chr1", 97450058)), RsIdInfo("rs72549309", "GATGA", "GATGA", GeneCoordinate("1", 98205966), GeneCoordinate("chr1", 97740410)), RsIdInfo("rs1801159", "T", "T", GeneCoordinate("1", 97981395), GeneCoordinate("chr1", 97515839)), RsIdInfo("rs72549303", "TG", "TC", GeneCoordinate("1", 97915621), GeneCoordinate("chr1", 97450065)), }) dpyd_drugs = frozenset({ DrugInfo("5-Fluorouracil", "https://www.pharmgkb.org/chemical/PA128406956/guidelineAnnotation/PA166104939"), DrugInfo("Capecitabine", "https://www.pharmgkb.org/chemical/PA448771/guidelineAnnotation/PA166104963"), }) dpyd_rs_id_to_difference_annotations = { "rs72549303": Annotation("6744CA>GA", "6744GA>CA"), } fake_haplotypes = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake_variant})), }) fake_rs_id_infos = frozenset({ RsIdInfo("rs1212125", "T", "T", GeneCoordinate("5", 97915617), GeneCoordinate("chr5", 97450060)), }) fake_drugs = frozenset({ DrugInfo("Aspirin", "https://www.pharmgkb.org/some_other_url"), }) fake_rs_id_to_difference_annotations: Dict[str, Annotation] = {} fake2_haplotypes = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake2_variant})), }) fake2_rs_id_infos = frozenset({ RsIdInfo("rs1212127", "C", "T", GeneCoordinate("16", 97915617), GeneCoordinate("chr16", 97450060)), }) fake2_drugs = frozenset({ DrugInfo("Aspirin", "https://www.pharmgkb.org/some_other_url"), }) fake2_rs_id_to_difference_annotations = {"rs1212127": Annotation("1324C>T", "1324T>C")} gene_infos = frozenset({ GeneInfo("DPYD", "*1", dpyd_haplotypes, dpyd_rs_id_infos, dpyd_drugs, dpyd_rs_id_to_difference_annotations), GeneInfo("FAKE", "*1", fake_haplotypes, fake_rs_id_infos, fake_drugs, fake_rs_id_to_difference_annotations), GeneInfo("FAKE2", "*1", fake2_haplotypes, fake2_rs_id_infos, fake2_drugs, fake2_rs_id_to_difference_annotations), }) name = "Panel" version = "0.2" return Panel(name, version, gene_infos)
def from_json(cls, data: Json) -> "GeneInfo": gene = str(data['gene']) chromosome_v37 = str(data['chromosomeV37']) chromosome_v38 = str(data['chromosomeV38']) wild_type_haplotype = str(data["wildTypeHaplotype"]) rs_id_infos = frozenset({ RsIdInfo.from_json(rs_id_info_json, chromosome_v37, chromosome_v38) for rs_id_info_json in data["variants"] }) haplotypes = frozenset({Haplotype.from_json(haplotype_json) for haplotype_json in data["haplotypes"]}) drugs = frozenset({DrugInfo.from_json(drug_json) for drug_json in data["drugs"]}) rs_id_to_ref_seq_difference_annotation_v38 = { str(annotation_json["rsid"]): Annotation.from_json(annotation_json) for annotation_json in data["refSeqDifferenceAnnotations"] } gene_info = GeneInfo( gene, wild_type_haplotype, haplotypes, rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation_v38, ) return gene_info
def test_gene_info_with_ref_seq_difference_without_annotation( self) -> None: """Error when a ref seq difference does not have an annotation""" gene = "FAKE" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" haplotypes: FrozenSet[Haplotype] = frozenset() drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() empty_rs_id_infos: FrozenSet[RsIdInfo] = frozenset() non_empty_rs_id_infos = frozenset([ RsIdInfo("rs294924", "A", "C", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483)) ]) GeneInfo(gene, reference_haplotype_name, haplotypes, empty_rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation) with self.assertRaises(ValueError): GeneInfo(gene, reference_haplotype_name, haplotypes, non_empty_rs_id_infos, drugs, rs_id_to_ref_seq_difference_annotation)
def test_panel_with_overlapping_rs_id_infos_for_different_genes( self) -> None: """Error when panel has overlapping rs id infos for different genes, but not when they are exactly the same""" name = "FakePanel" version = "1.0" gene1 = "FAKE" gene2 = "OTHER" chromosome_v37 = "X" chromosome_v38 = "chrX" reference_haplotype_name = "*1" haplotypes: FrozenSet[Haplotype] = frozenset() drugs: FrozenSet[DrugInfo] = frozenset() rs_id_to_ref_seq_difference_annotation: Dict[str, Annotation] = dict() rs_id_info1 = RsIdInfo( "rs294924", "AT", "AT", GeneCoordinate(chromosome_v37, 499593), GeneCoordinate(chromosome_v38, 399483), ) rs_id_info2 = RsIdInfo( "rs3949923", "C", "C", GeneCoordinate(chromosome_v37, 293993), GeneCoordinate(chromosome_v38, 1388323), ) rs_id_info3 = RsIdInfo( "rs12993", "GG", "GG", GeneCoordinate(chromosome_v37, 499592), GeneCoordinate(chromosome_v38, 399482), ) rs_id_infos1 = frozenset([rs_id_info1]) rs_id_infos2 = frozenset([rs_id_info1, rs_id_info2]) rs_id_infos3 = frozenset([rs_id_info3]) gene_info1 = GeneInfo( gene1, reference_haplotype_name, haplotypes, rs_id_infos1, drugs, rs_id_to_ref_seq_difference_annotation, ) gene_info2 = GeneInfo( gene2, reference_haplotype_name, haplotypes, rs_id_infos2, drugs, rs_id_to_ref_seq_difference_annotation, ) gene_info3 = GeneInfo( gene2, reference_haplotype_name, haplotypes, rs_id_infos3, drugs, rs_id_to_ref_seq_difference_annotation, ) Panel(name, version, frozenset([gene_info1, gene_info2])) with self.assertRaises(ValueError): Panel(name, version, frozenset([gene_info1, gene_info3]))
def test_load_panel(self) -> None: """Load panel from json""" panel_path = get_test_resource("test_panel.json") panel = load_panel(str(panel_path)) dpyd_two_a_variant = Variant("rs3918290", "T") dpyd_two_b_variant = Variant("rs1801159", "C") dpyd_three_variant = Variant("rs72549303", "TG") fake_variant = Variant("rs1212125", "C") fake2_variant = Variant("rs1212127", "C") dpyd_haplotypes_expected = frozenset({ Haplotype("*2A", "No Function", frozenset({dpyd_two_a_variant})), Haplotype("*2B", "No Function", frozenset({dpyd_two_a_variant, dpyd_two_b_variant})), Haplotype("*3", "Normal Function", frozenset({dpyd_three_variant})), }) dpyd_rs_id_infos_expected = frozenset({ RsIdInfo("rs3918290", "C", "C", GeneCoordinate("1", 97915614), GeneCoordinate("chr1", 97450058)), RsIdInfo("rs72549309", "GATGA", "GATGA", GeneCoordinate("1", 98205966), GeneCoordinate("chr1", 97740410)), RsIdInfo("rs1801159", "T", "T", GeneCoordinate("1", 97981395), GeneCoordinate("chr1", 97515839)), RsIdInfo("rs72549303", "TG", "TC", GeneCoordinate("1", 97915621), GeneCoordinate("chr1", 97450065)), RsIdInfo("rs1801265", "G", "A", GeneCoordinate("1", 98348885), GeneCoordinate("chr1", 97883329)), }) dpyd_drugs_expected = frozenset({ DrugInfo("5-Fluorouracil", "https://www.source_url.org/5-Fluorouracil"), DrugInfo("Capecitabine", "https://www.source_url.org/Capecitabine"), }) dpyd_rs_id_to_difference_annotations = { "rs72549303": Annotation("6744CA>GA", "6744GA>CA"), "rs1801265": Annotation("85C>T", "85T>C"), } fake_haplotypes_expected = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake_variant})), }) fake_rs_id_infos_expected = frozenset({ RsIdInfo("rs1212125", "T", "T", GeneCoordinate("5", 97915617), GeneCoordinate("chr5", 97450060)), }) fake_drugs_expected = frozenset({ DrugInfo("Aspirin", "https://www.source_url.org/Aspirin"), }) fake_rs_id_to_difference_annotations: Dict[str, Annotation] = {} fake2_haplotypes_expected = frozenset({ Haplotype("*4A", "Reduced Function", frozenset({fake2_variant})), }) fake2_rs_id_infos_expected = frozenset({ RsIdInfo("rs1212127", "C", "T", GeneCoordinate("16", 97915617), GeneCoordinate("chr16", 97450060)), }) fake2_drugs_expected = frozenset({ DrugInfo("Aspirin", "https://www.source_url.org/Aspirin"), }) fake2_rs_id_to_difference_annotations = { "rs1212127": Annotation("1324C>T", "1324T>C") } gene_infos_expected = frozenset({ GeneInfo("DPYD", "*1", dpyd_haplotypes_expected, dpyd_rs_id_infos_expected, dpyd_drugs_expected, dpyd_rs_id_to_difference_annotations), GeneInfo("FAKE", "*1", fake_haplotypes_expected, fake_rs_id_infos_expected, fake_drugs_expected, fake_rs_id_to_difference_annotations), GeneInfo("FAKE2", "*1", fake2_haplotypes_expected, fake2_rs_id_infos_expected, fake2_drugs_expected, fake2_rs_id_to_difference_annotations), }) name_expected = "fake_panel" version_expected = "0.3" panel_expected = Panel(name_expected, version_expected, gene_infos_expected) self.assertEqual(panel_expected, panel)