def test_TwoRecords_CorrectRegions(self):
        # base sequence:      T TAT    C G   G
        # derived sequence:   T GCCAC  C TTT G
        base_records = [
            _MockVcfRecord(pos=2, ref="TAT", alts=["GCCAC"]),
            _MockVcfRecord(pos=6, ref="G", alts=["TTT"]),
        ]

        chrom_sizes = {"JAC": 7}
        result = SeqRegionMapper(base_records, chrom_sizes).get_map()

        expected = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=5,
                vcf_record_ref="TAT",
                vcf_record_alt="GCCAC",
            ),
            SeqRegion(base_ref_start=5, pers_ref_start=7, length=1),
            SeqRegion(
                base_ref_start=6,
                pers_ref_start=8,
                length=3,
                vcf_record_ref="G",
                vcf_record_alt="TTT",
            ),
            SeqRegion(base_ref_start=7, pers_ref_start=11, length=1),
        ]

        self.assertEqual(expected, result["JAC"])
    def test_base_ref_further_than_pers_ref(self):
        mapped_regions = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=5,
                vcf_record_ref="TAT",
                vcf_record_alt="GCCAC",
            ),
            SeqRegion(
                base_ref_start=5,
                pers_ref_start=7,
                length=3,
                vcf_record_ref="G",
                vcf_record_alt="TTT",
            ),
        ]

        vcf_record = _MockVcfRecord(pos=6, ref="T", alts=["A"])
        searcher = SearchableSeqRegionsMap({"JAC": mapped_regions})

        pers_ref_result = searcher.bisect("JAC", vcf_record.pos,
                                          BisectTarget.PERS_REF)
        self.assertEqual(1, pers_ref_result)

        base_ref_result = searcher.bisect("JAC", vcf_record.pos,
                                          BisectTarget.BASE_REF)
        self.assertEqual(2, base_ref_result)
    def test_retrieve_searched_region(self):
        mapped_regions_1 = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=5,
                vcf_record_ref="TAT",
                vcf_record_alt="GCCAC",
            ),
        ]

        mapped_regions_2 = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=200)
        ]

        searcher = SearchableSeqRegionsMap({
            "chr1": mapped_regions_1,
            "chr2": mapped_regions_2
        })

        vcf_record = _MockVcfRecord(pos=100, ref="T", alts=["A"], chrom="chr2")
        its_index = searcher.bisect(vcf_record.chrom, vcf_record.pos,
                                    BisectTarget.PERS_REF)
        self.assertEqual(searcher.get_region(vcf_record.chrom, its_index),
                         mapped_regions_2[0])

        vcf_record = _MockVcfRecord(pos=4, ref="C", alts=["A"], chrom="chr1")
        its_index = searcher.bisect(vcf_record.chrom, vcf_record.pos,
                                    BisectTarget.PERS_REF)
        self.assertEqual(searcher.get_region(vcf_record.chrom, its_index),
                         mapped_regions_1[1])
    def test_base_ref_pers_ref_same_results(self):
        mapped_regions = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=3,
                vcf_record_ref="TAT",
                vcf_record_alt="GCC",
            ),
            SeqRegion(base_ref_start=5, pers_ref_start=5, length=3),
        ]

        vcf_record_in_var_region = _MockVcfRecord(pos=2, ref="GC", alts=["GA"])
        vcf_record_in_nonvar_region = _MockVcfRecord(pos=1,
                                                     ref="A",
                                                     alts=["T"])
        searcher = SearchableSeqRegionsMap({"JAC": mapped_regions})

        for target in BisectTarget:
            result = searcher.bisect("JAC", vcf_record_in_var_region.pos,
                                     target)
            self.assertEqual(1, result)

        for target in BisectTarget:
            result = searcher.bisect("JAC", vcf_record_in_nonvar_region.pos,
                                     target)
            self.assertEqual(0, result)
    def test_chrom_with_no_records(self):
        """
        Need to map chroms with no initial variation too
        """
        base_records = [
            _MockVcfRecord(pos=2, ref="T", alts=["A"], chrom="Chrom_2")
        ]

        chrom_sizes = {"Chrom_1": 4, "Chrom_2": 5}
        result = SeqRegionMapper(base_records, chrom_sizes).get_map()

        expected_Chrom_1 = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=4)
        ]
        expected_Chrom_2 = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=1,
                vcf_record_ref="T",
                vcf_record_alt="A",
            ),
            SeqRegion(base_ref_start=3, pers_ref_start=3, length=3),
        ]

        expectations = {
            "Chrom_1": expected_Chrom_1,
            "Chrom_2": expected_Chrom_2
        }
        for key in expectations:
            self.assertEqual(expectations[key], result[key])
 def setUpClass(cls) -> None:
     cls.mapped_regions = [
         SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
         SeqRegion(
             base_ref_start=2,
             pers_ref_start=2,
             length=2,
             vcf_record_ref="TAT",
             vcf_record_alt="CC",
         ),
     ]
    def test_json_deserialise_SeqRegion(self):
        reg1 = {
            "SeqRegion": {
                "base_ref_start": 1,
                "pers_ref_start": 1,
                "length": 1
            }
        }
        self.assertEqual(self.mapped_regions[0], SeqRegion.from_json(reg1))

        reg2 = {
            "SeqRegion": {
                "base_ref_start": 2,
                "pers_ref_start": 2,
                "length": 2,
                "vcf_record_ref": "TAT",
                "vcf_record_alt": "CC",
            }
        }
        self.assertEqual(self.mapped_regions[1], SeqRegion.from_json(reg2))
 def test_ref_call_produces_invariant_region_only(self):
     # base sequence:      T TAT CGG
     # derived sequence:   ^^^^^^^^^
     base_records = [
         _MockVcfRecord(pos=2, ref="TAT", alts=["G"], samples=[{
             "GT": [0]
         }])
     ]
     chrom_sizes = {"JAC": 7}
     result = SeqRegionMapper(base_records, chrom_sizes).get_map()
     expected = [SeqRegion(base_ref_start=1, pers_ref_start=1, length=7)]
     self.assertEqual(expected, result["JAC"])
    def test_SingleBaseAlt_CorrectRegion(self):
        # base sequence:      T TAT CGG
        # derived sequence:   T G   CGG
        base_records = [_MockVcfRecord(pos=2, ref="TAT", alts=["G"])]

        chrom_sizes = {"JAC": 7}

        result = SeqRegionMapper(base_records, chrom_sizes).get_map()

        expected = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=1,
                vcf_record_ref="TAT",
                vcf_record_alt="G",
            ),
            SeqRegion(base_ref_start=5, pers_ref_start=3, length=3),
        ]
        self.assertEqual(expected, result["JAC"])
    def test_ThreeAdjacentRecords_CorrectRegions(self):
        # base sequence:      T TAT    C   G  G
        # derived sequence:   T GCCAC  TCT AA G
        base_records = [
            _MockVcfRecord(pos=2, ref="TAT", alts=["GCCAC"]),
            _MockVcfRecord(pos=5, ref="C", alts=["TCT"]),
            _MockVcfRecord(pos=6, ref="G", alts=["AA"]),
        ]
        chrom_sizes = {"JAC": 7}
        result = SeqRegionMapper(base_records, chrom_sizes).get_map()

        expected = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=1),
            SeqRegion(
                base_ref_start=2,
                pers_ref_start=2,
                length=5,
                vcf_record_ref="TAT",
                vcf_record_alt="GCCAC",
            ),
            SeqRegion(
                base_ref_start=5,
                pers_ref_start=7,
                length=3,
                vcf_record_ref="C",
                vcf_record_alt="TCT",
            ),
            SeqRegion(
                base_ref_start=6,
                pers_ref_start=10,
                length=2,
                vcf_record_ref="G",
                vcf_record_alt="AA",
            ),
            SeqRegion(base_ref_start=7, pers_ref_start=12, length=1),
        ]

        self.assertEqual(expected, list(result.values())[0])
    def test_dump_and_load_without_sequences(self):
        """
        Serialisation without the REF and ALT SeqRegion sequences
        """
        searcher = SearchableSeqRegionsMap({"JAC": self.mapped_regions})
        tmpdir = Path(mkdtemp())
        tmpfile = tmpdir / "map.json"
        searcher.dump_to(tmpfile, dump_sequences=False)
        loaded_searcher = SearchableSeqRegionsMap.load_from(tmpfile)

        self.assertEqual(searcher.get_region("JAC", 0),
                         loaded_searcher.get_region("JAC", 0))
        self.assertEqual(SeqRegion(2, 2, 2),
                         loaded_searcher.get_region("JAC", 1))
        rmtree(tmpdir)
    def test_TwoRecords_TwoDifferentChroms(self):
        base_records = [
            _MockVcfRecord(pos=4, ref="ATTC", alts=["A"], chrom="Chrom_1"),
            _MockVcfRecord(pos=6, ref="A", alts=["AAC"], chrom="Chrom_2"),
        ]

        chrom_sizes = {"Chrom_1": 10, "Chrom_2": 8}
        result = SeqRegionMapper(base_records, chrom_sizes).get_map()

        expected_Chrom_1 = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=3),
            SeqRegion(
                base_ref_start=4,
                pers_ref_start=4,
                length=1,
                vcf_record_ref="ATTC",
                vcf_record_alt="A",
            ),
            SeqRegion(base_ref_start=8, pers_ref_start=5, length=3),
        ]

        expected_Chrom_2 = [
            SeqRegion(base_ref_start=1, pers_ref_start=1, length=5),
            SeqRegion(
                base_ref_start=6,
                pers_ref_start=6,
                length=3,
                vcf_record_ref="A",
                vcf_record_alt="AAC",
            ),
            SeqRegion(base_ref_start=7, pers_ref_start=9, length=2),
        ]
        expectations = {
            "Chrom_1": expected_Chrom_1,
            "Chrom_2": expected_Chrom_2
        }
        for key in expectations:
            self.assertEqual(expectations[key], result[key])