Ejemplo n.º 1
0
class TestGenomicRegionSet(unittest.TestCase):
    
    def region_sets(self,listA,listB):
        """ Setting two GenomicRegionSets as self.setA and self.setB for each case test. """
        self.setA = GenomicRegionSet('for Unit Test')
        for i in range(len(listA)):
            self.setA.add(GenomicRegion(chrom=listA[i][0], initial=listA[i][1], final=listA[i][2]))
        
        self.setB = GenomicRegionSet('for Unit Test')
        for i in range(len(listB)):
            self.setB.add(GenomicRegion(chrom=listB[i][0], initial=listB[i][1], final=listB[i][2]))
    
    def test_extend(self):
        """
        Two empty sets
        A : none 
        R : none
        """
        self.region_sets([],
                         [])
        self.setA.extend(100,100)
        self.assertEqual(len(self.setA.sequences), 0)
        """
        One region
        A :   -----
        R : ---------
        """
        self.region_sets([['chr1',5,10]],
                         [])
        result = self.setA
        result.extend(4,4)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 14)
        """
        Many region
        A :   -----   ------         -----    -----
        R : --------=---------     ------------------
        """
        self.region_sets([['chr1',5,10],['chr1',15,20],['chr1',40,50],['chr1',65,75]],
                         [])
        result = self.setA
        result.extend(5,5)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 0)
        self.assertEqual(result[0].final, 15)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 25)
        self.assertEqual(result[2].initial, 35)
        self.assertEqual(result[2].final, 55)
        self.assertEqual(result[3].initial, 60)
        self.assertEqual(result[3].final, 80)
        """
        Many region in different chromosome
        A :   -----   ------         -----    -----
        R : none
        """
        self.region_sets([['chr1',5,10],['chr2',15,20],['chr3',40,50],['chr4',65,75]],
                         [])
        result = self.setA
        result.extend(5,5)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 0)
        self.assertEqual(result[0].final, 15)
        self.assertEqual(result[0].chrom, 'chr1')
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 25)
        self.assertEqual(result[1].chrom, 'chr2')
        self.assertEqual(result[2].initial, 35)
        self.assertEqual(result[2].final, 55)
        self.assertEqual(result[2].chrom, 'chr3')
        self.assertEqual(result[3].initial, 60)
        self.assertEqual(result[3].final, 80)
        self.assertEqual(result[3].chrom, 'chr4')
        """
        One region
        A :   -----
        R : ---------
        """
        self.region_sets([['chr1',100,200]],
                         [])
        result = self.setA
        result.extend(10,10,percentage=True)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 90)
        self.assertEqual(result[0].final, 210)
        
    def test_sort(self):
        self.region_sets([['chr1',15,20],['chr1',40,50],['chr1',65,75],['chr1',5,10]],
                         [])
        self.setA.sort()
    
    def test_intersect(self):
        """
        Two empty sets
        A : none 
        B : none
        R : none
        """
        self.region_sets([],
                         [])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        One empty set
        A :   -----
        B : none
        R : none
        """
        self.region_sets([['chr1',5,10]],
                         [])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        A : none
        B :   -----
        R : none
        """
        self.region_sets([],
                         [['chr1',5,10]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        No overlapping
        A : ------      ---------               ------- 
        B :        ----          ------  ------   
        R : none
        """
        self.region_sets([['chr1',1,5],['chr1',11,20],['chr1',33,38]],
                         [['chr1',7,9],['chr1',20,25],['chr1',26,31]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        End-to-end attach
        A : ------      ------
        B :       ------
        R : none
        """
        self.region_sets([['chr1',1,5],['chr1',11,20]],
                         [['chr1',5,11]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        No length attach
        A : .      .
        B :    .   .
        R : none
        """
        self.region_sets([['chr1',2,2],['chr1',20,20]],
                         [['chr1',5,5],['chr1',20,20]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        
        """
        Perfect overlapping
        A : ------
        B : ------
        R : ------
        """
        self.region_sets([['chr1',1,10],['chr1',500,550],['chr1',600,650],['chr1',700,750],['chr1',725,800]],
                         [['chr1',1,10],['chr1',500,550],['chr1',600,650],['chr1',700,750],['chr1',725,800]])
        result = self.setA.intersect(self.setB, mode=OverlapType.OVERLAP, rm_duplicates=True)
        
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 5)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 5)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        """
        One overlapping region
        A : ------
        B :     --------
        R1:     --       (overlap)
        R2: ------       (original)
        R3:              (comp_incl)
        """

        self.region_sets([['chr1',1,10]],
                         [['chr1',7,20]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 7)
        self.assertEqual(result[0].final, 10)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Two simple overlapping regions
        A : -------      --------
        B :     -------------
        R1:     ---      ----     (overlap)
        R2: -------      -------- (original)
        R3:                       (comp_incl)
        """
        self.region_sets([['chr1',1,10],['chr1',26,35]],
                         [['chr1',7,30]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 7)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 26)
        self.assertEqual(result[1].final, 30)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 26)
        self.assertEqual(result[1].final, 35)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Two separately overlapping regions 
        A : -------      --------
        B :     -----        --------
        R1:     ---          ----     (overlap)
        R2: -------      --------     (original)
        R3:                           (comp_incl)
        """
        self.region_sets([['chr1',1,10],['chr1',26,35]],
                         [['chr1',7,15],['chr1',30,40]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 7)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 30)
        self.assertEqual(result[1].final, 35)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 26)
        self.assertEqual(result[1].final, 35)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Many various overlapping (mixed)
        A :   ------------------            --------   ---------
        B : ----   -------    ------            ----------      
        R1:   --   -------    --                ----   ---       (overlap)
        R2:   ------------------            --------   --------- (original)
        R3:                                                      (comp_incl) 
        """

        self.region_sets([['chr1',3,30],['chr1',50,60],['chr1',70,85]],
                         [['chr1',1,5],['chr1',10,19],['chr1',27,35],['chr1',55,75]])

        result = self.setA.intersect(self.setB, mode=OverlapType.OVERLAP)
        self.assertEqual(len(result), 5)
        self.assertEqual(result[0].initial, 3)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 27)
        self.assertEqual(result[2].final, 30)
        self.assertEqual(result[3].initial, 55)
        self.assertEqual(result[3].final, 60)
        self.assertEqual(result[4].initial, 70)
        self.assertEqual(result[4].final, 75)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 3)
        self.assertEqual(result[0].final, 30)
        self.assertEqual(result[1].initial, 50)
        self.assertEqual(result[1].final, 60)
        self.assertEqual(result[2].initial, 70)
        self.assertEqual(result[2].final, 85)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Different chromosomes
        A : chr1  -------
        B : chr2  -------
        R : none
        """
        self.region_sets([['chr1',1,10]],
                         [['chr2',1,10]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Completely included overlapping
        A : ---------------------------
        B : ----    ------       -----------
        R1: ----    ------       ------      (overlap)
        R2: ---------------------------      (original)
        R3:                                  (comp_incl)
        """
        self.region_sets([['chr1',1,50]],
                         [['chr1',1,5],['chr1',10,19],['chr1',45,60]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 45)
        self.assertEqual(result[2].final, 50)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 50)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        A : ----    ------       -----------
        B : ---------------------------
        R1: ----    ------       ------      (overlap)
        R2: ----    ------       ----------- (original)
        R3: ----    ------                   (comp_incl)
        """

        self.region_sets([['chr1',1,5],['chr1',10,19],['chr1',45,60]],
                         [['chr1',1,50]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 45)
        self.assertEqual(result[2].final, 50)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 45)
        self.assertEqual(result[2].final, 60)
        
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)

        """
        A : --------------         -------
                ------
        B :       -----          ----------------
        R1:       -----            -------      (overlap)
                  ----
        R2: --------------         -------      (original)
                ------
        R3:                        -------      (comp_incl)
        """
        self.region_sets([['chr1',1,50],['chr1',20,40],['chr1',70,80]],
                         [['chr1',25,45],['chr1',65,95]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 25)
        self.assertEqual(result[0].final, 45)
        self.assertEqual(result[1].initial, 70)
        self.assertEqual(result[1].final, 80)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[1].initial, 20)
        self.assertEqual(result[1].final, 40)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 50)
        self.assertEqual(result[2].initial, 70)
        self.assertEqual(result[2].final, 80)
        
        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 70)
        self.assertEqual(result[0].final, 80)

    def test_closest(self):
        """
        Two empty sets
        A : none 
        B : none
        R : none
        """
        self.region_sets([],
                         [])
        result = self.setA.closest(self.setB)
        self.assertEqual(len(result), 0)
        # """
        # One empty set
        # A :   -----
        # B : none
        # R : none
        # """
        # self.region_sets([['chr1',5,10]],
        #                  [])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 0)
        # """
        # A : none
        # B :   -----
        # R : none
        # """
        # self.region_sets([],
        #                  [['chr1',5,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 0)
        # """
        # Overlapping within set
        # A : -----====-----
        # B :      ----
        # R :      ----
        # """
        # self.region_sets([['chr1',1,10],['chr1',6,15]],
        #                  [['chr1',6,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 2)
        # """
        # A :      ----
        # B : -----====-----
        # R : -----====-----
        # """
        # self.region_sets([['chr1',6,10]],
        #                  [['chr1',1,10],['chr1',6,15]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 1)
        # """
        # No overlapping
        # A : ------      ---------               -------
        # B :        ----          ------  ------
        # R :                      ------
        # """
        # self.region_sets([['chr1',1,5],['chr1',11,20],['chr1',33,38]],
        #                  [['chr1',7,9],['chr1',20,25],['chr1',26,31]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 3)
        # # self.assertEqual(result[0].initial, 20)
        # # self.assertEqual(result[0].final, 25)
        # """
        # End-to-end attach
        # A : ------      ------
        # B :       ------
        # R :       ------
        # """
        # self.region_sets([['chr1',1,5],['chr1',11,20]],
        #                  [['chr1',5,11]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 2)
        # # self.assertEqual(result[0].initial, 5)
        # # self.assertEqual(result[0].final, 11)
        # """
        # Perfect overlapping
        # A : ------
        # B : ------
        # R : ------
        # """
        # self.region_sets([['chr1',1,10]],
        #                  [['chr1',1,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 1)
        # self.assertEqual(result[0].initial, 1)
        # self.assertEqual(result[0].final, 10)
        # """
        # One overlapping region
        # A : ------
        # B :     --------
        # R :     --------
        # """
        # self.region_sets([['chr1',1,10]],
        #                  [['chr1',7,20]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(result[0].initial, 7)
        # self.assertEqual(result[0].final, 20)
        # """
        # Two simple overlapping regions
        # A : -------      --------
        # B :     -------------
        # R :     -------------
        # """
        # self.region_sets([['chr1',1,10],['chr1',26,35]],
        #                  [['chr1',7,30]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(result[0].initial, 7)
        # self.assertEqual(result[0].final, 30)
        # """
        # Two separately overlapping regions
        # A : -------      --------
        # B :     -----        --------
        # R : none
        # """
        # self.region_sets([['chr1',1,10],['chr1',26,35]],
        #                  [['chr1',7,15],['chr1',30,40]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 2)
        # """
        # Many various overlapping (mixed)
        # A :   ------------------            --------   ---------
        # B : ----   -------    ------            ----------
        # R : none
        # """
        # self.region_sets([['chr1',3,30],['chr1',50,60],['chr1',70,85]],
        #                  [['chr1',1,5],['chr1',10,19],['chr1',27,35],['chr1',55,75]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 4)
        # """
        # Different chromosomes
        # A : chr1  -------
        # B : chr2  -------
        # R : chr2  -------
        #
        # """
        # self.region_sets([['chr1',1,10]],
        #                  [['chr2',1,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 0)
        # """
        # Completely included overlapping
        # A : ---------------------------
        # B : ----    ------       -----------
        # R : ----    ------       -----------
        # """
        # self.region_sets([['chr1',1,50]],
        #                  [['chr1',1,5],['chr1',10,19],['chr1',45,60]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 3)
        # """
        # A : ----    ------       -----------
        # B : ---------------------------
        # R : none
        # """
        # self.region_sets([['chr1',1,5],['chr1',10,19],['chr1',45,60]],
        #                  [['chr1',1,50]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(result, False)
        # """
        # A : ----         ------                  ---
        # B :        ---              -----
        # R :        ---
        # """
        # self.region_sets([['chr1',1,5],['chr1',27,45],['chr1',85,95]],
        #                  [['chr1',15,20],['chr1',55,65]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 1)
        # self.assertEqual(result[0].initial, 15)
        # self.assertEqual(result[0].final, 20)
    
    def test_remove_duplicates(self):
        """
        A : ===== -----
        R : ----- -----
        """
        self.region_sets([['chr1',1,10],['chr1',1,10],['chr1',15,25]],
                         [])
        self.setA.remove_duplicates()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        """
        A : =====--- -----
        R : =====--- -----
        """
        self.region_sets([['chr1',1,10],['chr1',1,15],['chr1',20,25]],
                         [])
        self.setA.remove_duplicates()
        result = self.setA
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 1)
        self.assertEqual(result[1].final, 15)
        self.assertEqual(result[2].initial, 20)
        self.assertEqual(result[2].final, 25)
        """
        A : ===== ----- ------  ====
        R : ----- ----- ------  ----
        """
        self.region_sets([['chr1',1,10],['chr1',1,10],['chr1',15,25],['chr1',30,35],['chr1',40,45],['chr1',40,45]],
                         [])
        self.setA.remove_duplicates()
        result = self.setA
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        self.assertEqual(result[2].initial, 30)
        self.assertEqual(result[2].final, 35)
        self.assertEqual(result[3].initial, 40)
        self.assertEqual(result[3].final, 45)

    def test_window(self):
        """
        A :             -------
        B : ------[ 99 ]       [   199   ]---
        window = 100
        R :       -                           only one base overlaps with extending A
        """   
        self.region_sets([['chr1',200,300]],
                         [['chr1',1,101],['chr1',499,550]])
        result = self.setA.window(self.setB,adding_length=100)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 100)
        self.assertEqual(result[0].final, 101)
        """
        A :             -------
        B : ------[ 99 ]       [   199   ]---
        window = 200
        R : ------                        -   
        left-hand side is covered, and the right-hand side is only one base overlapped
        """   
        self.region_sets([['chr1',200,300]],
                         [['chr1',1,101],['chr1',499,550]])
        result = self.setA.window(self.setB,adding_length=200)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)  # GenomicRegion.extend will choose 1 rather than 0
        self.assertEqual(result[0].final, 101)
        self.assertEqual(result[1].initial, 499)
        self.assertEqual(result[1].final, 500)
        """
        A :                         ----    ----
        B :             --------                    ----
        window = 1000 (default)
        R :                 ----                    ----
        """   
        self.region_sets([['chr1',3000,3500],['chr1',4000,4500]],
                         [['chr1',1500,2500],['chr1',5000,5500]])
        result = self.setA.window(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 2000)
        self.assertEqual(result[0].final, 2500)
        self.assertEqual(result[1].initial, 5000)
        self.assertEqual(result[1].final, 5500)
        """
        A :                         ----    ----
        B :             --------                    ----
        window = 2000
        R :             --------                    ----
                            ----                    ----
        window = 100
        R : none
        """   
        self.region_sets([['chr1',3000,3500],['chr1',4000,4500]],
                         [['chr1',1500,2500],['chr1',5000,5500]])
        result = self.setA.window(self.setB,adding_length=2000)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1500)
        self.assertEqual(result[0].final, 2500)
        self.assertEqual(result[1].initial, 5000)
        self.assertEqual(result[1].final, 5500)
        result = self.setA.window(self.setB,adding_length=100)
        self.assertEqual(len(result), 0)
        
    def test_subtract(self):
        """
        A : none
        B :    ------
        R : none
        """
        self.region_sets([],
                         [['chr1',6,15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 0)
        """
        A :    ------
        B : none
        R :    ------
        """
        self.region_sets([['chr1',6,15]],
                         [])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 6)
        self.assertEqual(result[0].final, 15)
        """
        A : ------
        B :    ------
        R : ---
        """
        self.region_sets([['chr1',1,10]],
                         [['chr1',6,15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 6)
        """
        A :    ------
        B : ------
        R :       ---
        """
        self.region_sets([['chr1',6,15]],
                         [['chr1',1,10]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 10)
        self.assertEqual(result[0].final, 15)
        """
        A :    ---
        B : ---------
        R : none
        """
        self.region_sets([['chr1',6,10]],
                         [['chr1',1,15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 0)
        """
        A : ---------
        B :    ---
        R : ---   ---
        """
        self.region_sets([['chr1',1,15]],
                         [['chr1',6,10]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 6)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 15)
        """
        A :    ------
        B :    ------
        R : none
        """
        self.region_sets([['chr1',6,15]],
                         [['chr1',6,15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 0)
        """
        A :   ----------              ------
        B :          ----------                    ----
        R :   -------                 ------
        """
        self.region_sets([['chr1',5,30],['chr1',70,85]],
                         [['chr1',20,50],['chr1',100,110]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 5)
        self.assertEqual(result[0].final, 20)
        self.assertEqual(result[1].initial, 70)
        self.assertEqual(result[1].final, 85)
        """
        A :        ------   -----
        B :    ------
        R :          ----   -----
        """
        self.region_sets([['chr1',20,30],['chr1',35,55]],
                         [['chr1',10,23],['chr1',100,110]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 23)
        self.assertEqual(result[0].final, 30)
        self.assertEqual(result[1].initial, 35)
        self.assertEqual(result[1].final, 55)
        """
        A :   ch1     ---------------------
              ch2     -------------------------
        B :   ch1             ------
              ch2                        ------
        R :   ch1     --------      -------
              ch2     -------------------
        """
        self.region_sets([['chr1',0,30000],['chr2',0,35000]],
                         [['chr1',20000,23000],['chr2',31000,35000]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 0)
        self.assertEqual(result[0].final, 20000)
        self.assertEqual(result[1].initial, 23000)
        self.assertEqual(result[1].final, 30000)
        self.assertEqual(result[2].initial, 0)
        self.assertEqual(result[2].final, 31000)
        """
        A :   -----------------------------------------------------------
        B :    ---    ---------         ----           ----
        R :   -   ----         ---------    -----------    --------------
        """
        self.region_sets([['chr1',5,1000]],
                         [['chr1',10,15],['chr1',30,70],['chr1',120,140],['chr1',200,240]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 5)
        
        """
        A :   -----------------------              ------
                   -----     -----  -----------
        B :    ---    ---------         ----           ----
        R :   -   ----         ------              ----
                   ---         ---  ----    ---
        """
        self.region_sets([['chr1',5,100],['chr1',20,40],['chr1',60,80],['chr1',95,150],['chr1',180,220]],
                         [['chr1',10,15],['chr1',30,70],['chr1',120,140],['chr1',200,240]])
        result = self.setA.subtract(self.setB)
        #print(result.sequences)
        self.assertEqual(len(result), 8)
        self.assertEqual(result[0].initial, 5)
        """
        A :   -----------------------------------------------------------
        B :    ---    ---------         ----           ----
        R :   -   ----         ---------    -----------    --------------
        """
        self.region_sets([['chr1',5,1000],['chr2',5,1000],['chr4',5,1000]],
                         [['chr1',10,15],['chr1',30,70],['chr1',120,140],['chr1',200,240],
                          ['chr2',10,15],['chr2',30,70],['chr2',120,140],['chr2',200,240],
                          ['chr4',10,15],['chr4',30,70],['chr4',120,140],['chr4',200,240]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 15)
        
        
    def test_merge(self):
        """
        A : none
        R : none
        """
        self.region_sets([],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 0)
        """
        A : -----  -----
        R : -----  -----
        """
        self.region_sets([['chr1',1,10],['chr1',15,25]],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        """
        A1: ------------   ----
        A2:    -----
        R : ------------   ----
        """
        self.region_sets([['chr1',1,30],['chr1',11,20],['chr1',40,50]],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 30)
        self.assertEqual(result[1].initial, 40)
        self.assertEqual(result[1].final, 50)
        """
        A1: --------       ----
        A2:    ---------
        R : ------------   ----
        """
        self.region_sets([['chr1',1,30],['chr1',20,40],['chr1',50,60]],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 40)
        self.assertEqual(result[1].initial, 50)
        self.assertEqual(result[1].final, 60)
        """
        A : =======
        R : -------
        """
        self.region_sets([['chr1',1,30],['chr1',1,30]],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 30)
        
    def test_cluster(self):
        """
        Empty sets
        A : none 
        R : none
        """
        self.region_sets([],
                         [])
        result = self.setA.cluster(10)
        self.assertEqual(len(result), 0)
        """
        A :  ------- 
        R :  -------
        """
        self.region_sets([['chr1',1,10]],
                         [])
        result = self.setA.cluster(10)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        """
        A :  -----
                  ------
        R :  -----------
        """
        self.region_sets([['chr1',1,10],['chr1',10,20]],
                         [])
        result = self.setA.cluster(10)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 20)
        """
        A :  -----  -----
        R1:  -----  -----
        R2:  ------------
        """
        self.region_sets([['chr1',1,10],['chr1',15,25]],
                         [])
        result = self.setA.cluster(1)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        result = self.setA.cluster(5)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        result = self.setA.cluster(6)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 25)
        """
        A :  ---- ----  ----   ----    ----
        R1:  ---------  ----   ----    ----
        R2:  ---------------   ----    ----
        R3:  ----------------------    ----
        R4:  ------------------------------
        R5:  ------------------------------
        """
        self.region_sets([['chr1',1,10],['chr1',15,25],['chr1',35,45],
                          ['chr1',60,70],['chr1',90,100]],
                         [])
        result = self.setA.cluster(6)
        self.assertEqual(len(result), 4)
        result = self.setA.cluster(11)
        self.assertEqual(len(result), 3)
        result = self.setA.cluster(16)
        self.assertEqual(len(result), 2)
        result = self.setA.cluster(21)
        self.assertEqual(len(result), 1)
        result = self.setA.cluster(26)
        self.assertEqual(len(result), 1)
        
    def test_flank(self):
        """
        A :        -----
        R1:     ---     ---
        """
        self.region_sets([['chr1',60,75]],
                         [])
        result = self.setA.flank(10)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 50)
        self.assertEqual(result[0].final, 60)
        self.assertEqual(result[1].initial, 75)
        self.assertEqual(result[1].final, 85)
        """
        A :        -----     ----
        R1:   -----     =====    ----
        """
        self.region_sets([['chr1',60,75],['chr1',90,100]],
                         [])
        result = self.setA.flank(15)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 45)
        self.assertEqual(result[0].final, 60)
        self.assertEqual(result[1].initial, 75)
        self.assertEqual(result[1].final, 90)
        self.assertEqual(result[2].initial, 75)
        self.assertEqual(result[2].final, 90)
        self.assertEqual(result[3].initial, 100)
        self.assertEqual(result[3].final, 115)
        
    def test_jaccard(self):
        """
        self           --8--      ---10---      -4-
        y         ---10---             ---10---
        intersect      -5-             -4-    
        similarity:   ( 5 + 4 )/[(8 + 10 + 4) + (10 +10) - (5 + 4 )]
                      = 9/33
        """
        self.region_sets([['chr1',50,58],['chr1',70,80],['chr1',90,94]],
                         [['chr1',45,55],['chr1',76,86]])
        result = self.setA.jaccard(self.setB)
        self.assertEqual(result, 9/33)
    
    def test_get_genome_data(self):
        """hg19"""
        result = GenomicRegionSet("hg19")
        result.get_genome_data(organism="hg19")
        self.assertEqual(len(result), 23)
        """hg19, with Mitochondria chromosome"""
        result = GenomicRegionSet("hg19")
        result.get_genome_data(organism="hg19",chrom_M=True)
        self.assertEqual(len(result), 24)
        
    def test_random_regions(self):
        
        self.region_sets([['chr1',0,10000],['chr2',0,20000],['chrX',0,30000]],
                         [])
        result = self.setA.random_regions(organism="mm9", 
                                          total_size=100, 
                                          overlap_result=False, 
                                          overlap_input=False)
        result.sort()
        #print("-"*80)
        #print("The result random regions are: ")
        #for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        #print("Overlaps within result: ",result.within_overlap())
        
        
        self.region_sets([['chr1',0,10000],['chr2',0,20000],['chrX',0,30000]],
                         [])
        result = self.setA.random_regions(organism="mm9", 
                                          total_size=100, 
                                          overlap_result=True, 
                                          overlap_input=False)
        result.sort()
        #print("-"*80)
        #print("The result random regions are: ")
        #for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        #print("Overlaps within result: ",result.within_overlap())
        
        self.region_sets([['chr1',0,10000],['chr2',0,20000],['chrX',0,30000]],
                         [])
        result = self.setA.random_regions(organism="mm9", 
                                          total_size=100, 
                                          overlap_result=False, 
                                          overlap_input=True)
        result.sort()
        #print("-"*80)
        #print("The result random regions are: ")
        #for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        #print("Overlaps within result: ",result.within_overlap())
        
        self.region_sets([['chr1',0,10000],['chr2',0,20000],['chrX',0,30000]],
                         [])
        result = self.setA.random_regions(organism="mm9", 
                                          total_size=100, 
                                          overlap_result=True, 
                                          overlap_input=True)
        result.sort()
        #print("-"*80)
        #print("The result random regions are: ")
        #for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        #print("Overlaps within result: ",result.within_overlap())
        
        self.region_sets([['chr1',0,1000],['chr2',0,2000],['chrX',0,3000]],
                         [])
        result = self.setA.random_regions(organism="mm9", 
                                          multiply_factor=100, 
                                          overlap_result=False, 
                                          overlap_input=False)
        result.sort()
        #print("-"*80)
        #print("The result random regions are: ")
        #for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        #print("Overlaps within result: ",result.within_overlap())
        
        self.region_sets([['chr1',0,1000],['chr2',0,2000],['chrX',0,3000]],
                         [])
        result = self.setA.random_regions(organism="mm9", 
                                          multiply_factor=100, 
                                          overlap_result=False, 
                                          overlap_input=False,
                                          chrom_M=True)
        result.sort()
Ejemplo n.º 2
0
parser.add_argument('-l','-length', type=int, help='Define the length of each sequence.')
parser.add_argument('-n','-number', type=int, help='Define the number of random regions.')
parser.add_argument('-f','-filter', default=None, help='Given the path to the BED file as the filter.')

args = parser.parse_args()

# Setup the entries
region = GenomicRegion("sample", initial=0, final=args.l)
template = GenomicRegionSet("tamplate")
template.add(region)
    
if not os.path.exists(bed_dir):
    os.makedirs(bed_dir)
    
# Random region
result = template.random_regions(organism= "hg19", total_size=args.n, multiply_factor=0, overlap_result=True, overlap_input=True, chrom_X=False, chrom_M=False, filter_path=args.f)
result.write(os.path.join(bed_dir, "00total.bed"))
chrom = GenomicRegionSet("chrom")
chrom.get_genome_data(organism=args.o, chrom_X=False, chrom_M=False)
            
chrom_list = []
for r in chrom.sequences:
    chrom_list.append(r.chrom)
        
print("Settings:\n\tAllowing overlapping within random regions.")
print("Randomizing method:\n\tChoose chromosome with the same possibility (1/23 for each chromosome)\n" +
      "\tChoose random position with the given length without the regard of previous chosen regions.")
print("The distribution of the results are:")
for c in chrom_list:
    print("\t"+str(len(result.any_chrom(c)))+" random regions on " + c)
Ejemplo n.º 3
0
class TestGenomicRegionSet(unittest.TestCase):
    def region_sets(self, listA, listB):
        """ Setting two GenomicRegionSets as self.setA and self.setB for each case test. """
        self.setA = GenomicRegionSet('for Unit Test')
        for i in range(len(listA)):
            self.setA.add(
                GenomicRegion(chrom=listA[i][0],
                              initial=listA[i][1],
                              final=listA[i][2]))

        self.setB = GenomicRegionSet('for Unit Test')
        for i in range(len(listB)):
            self.setB.add(
                GenomicRegion(chrom=listB[i][0],
                              initial=listB[i][1],
                              final=listB[i][2]))

    def test_extend(self):
        """
        Two empty sets
        A : none 
        R : none
        """
        self.region_sets([], [])
        self.setA.extend(100, 100)
        self.assertEqual(len(self.setA.sequences), 0)
        """
        One region
        A :   -----
        R : ---------
        """
        self.region_sets([['chr1', 5, 10]], [])
        result = self.setA
        result.extend(4, 4)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 14)
        """
        Many region
        A :   -----   ------         -----    -----
        R : --------=---------     ------------------
        """
        self.region_sets([['chr1', 5, 10], ['chr1', 15, 20], ['chr1', 40, 50],
                          ['chr1', 65, 75]], [])
        result = self.setA
        result.extend(5, 5)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 0)
        self.assertEqual(result[0].final, 15)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 25)
        self.assertEqual(result[2].initial, 35)
        self.assertEqual(result[2].final, 55)
        self.assertEqual(result[3].initial, 60)
        self.assertEqual(result[3].final, 80)
        """
        Many region in different chromosome
        A :   -----   ------         -----    -----
        R : none
        """
        self.region_sets([['chr1', 5, 10], ['chr2', 15, 20], ['chr3', 40, 50],
                          ['chr4', 65, 75]], [])
        result = self.setA
        result.extend(5, 5)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 0)
        self.assertEqual(result[0].final, 15)
        self.assertEqual(result[0].chrom, 'chr1')
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 25)
        self.assertEqual(result[1].chrom, 'chr2')
        self.assertEqual(result[2].initial, 35)
        self.assertEqual(result[2].final, 55)
        self.assertEqual(result[2].chrom, 'chr3')
        self.assertEqual(result[3].initial, 60)
        self.assertEqual(result[3].final, 80)
        self.assertEqual(result[3].chrom, 'chr4')
        """
        One region
        A :   -----
        R : ---------
        """
        self.region_sets([['chr1', 100, 200]], [])
        result = self.setA
        result.extend(10, 10, percentage=True)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 90)
        self.assertEqual(result[0].final, 210)

    def test_sort(self):
        self.region_sets([['chr1', 15, 20], ['chr1', 40, 50], ['chr1', 65, 75],
                          ['chr1', 5, 10]], [])
        self.setA.sort()

    def test_intersect(self):
        """
        Two empty sets
        A : none 
        B : none
        R : none
        """
        self.region_sets([], [])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        One empty set
        A :   -----
        B : none
        R : none
        """
        self.region_sets([['chr1', 5, 10]], [])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        A : none
        B :   -----
        R : none
        """
        self.region_sets([], [['chr1', 5, 10]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        No overlapping
        A : ------      ---------               ------- 
        B :        ----          ------  ------   
        R : none
        """
        self.region_sets([['chr1', 1, 5], ['chr1', 11, 20], ['chr1', 33, 38]],
                         [['chr1', 7, 9], ['chr1', 20, 25], ['chr1', 26, 31]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        End-to-end attach
        A : ------      ------
        B :       ------
        R : none
        """
        self.region_sets([['chr1', 1, 5], ['chr1', 11, 20]], [['chr1', 5, 11]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        No length attach
        A : .      .
        B :    .   .
        R : none
        """
        self.region_sets([['chr1', 2, 2], ['chr1', 20, 20]],
                         [['chr1', 5, 5], ['chr1', 20, 20]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Perfect overlapping
        A : ------
        B : ------
        R : ------
        """
        self.region_sets(
            [['chr1', 1, 10], ['chr1', 500, 550], ['chr1', 600, 650],
             ['chr1', 700, 750], ['chr1', 725, 800]],
            [['chr1', 1, 10], ['chr1', 500, 550], ['chr1', 600, 650],
             ['chr1', 700, 750], ['chr1', 725, 800]])
        result = self.setA.intersect(self.setB,
                                     mode=OverlapType.OVERLAP,
                                     rm_duplicates=True)

        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 5)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 5)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        """
        One overlapping region
        A : ------
        B :     --------
        R1:     --       (overlap)
        R2: ------       (original)
        R3:              (comp_incl)
        """

        self.region_sets([['chr1', 1, 10]], [['chr1', 7, 20]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 7)
        self.assertEqual(result[0].final, 10)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Two simple overlapping regions
        A : -------      --------
        B :     -------------
        R1:     ---      ----     (overlap)
        R2: -------      -------- (original)
        R3:                       (comp_incl)
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 26, 35]],
                         [['chr1', 7, 30]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 7)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 26)
        self.assertEqual(result[1].final, 30)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 26)
        self.assertEqual(result[1].final, 35)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Two separately overlapping regions 
        A : -------      --------
        B :     -----        --------
        R1:     ---          ----     (overlap)
        R2: -------      --------     (original)
        R3:                           (comp_incl)
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 26, 35]],
                         [['chr1', 7, 15], ['chr1', 30, 40]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 7)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 30)
        self.assertEqual(result[1].final, 35)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 26)
        self.assertEqual(result[1].final, 35)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Many various overlapping (mixed)
        A :   ------------------            --------   ---------
        B : ----   -------    ------            ----------      
        R1:   --   -------    --                ----   ---       (overlap)
        R2:   ------------------            --------   --------- (original)
        R3:                                                      (comp_incl) 
        """

        self.region_sets([['chr1', 3, 30], ['chr1', 50, 60], ['chr1', 70, 85]],
                         [['chr1', 1, 5], ['chr1', 10, 19], ['chr1', 27, 35],
                          ['chr1', 55, 75]])

        result = self.setA.intersect(self.setB, mode=OverlapType.OVERLAP)
        self.assertEqual(len(result), 5)
        self.assertEqual(result[0].initial, 3)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 27)
        self.assertEqual(result[2].final, 30)
        self.assertEqual(result[3].initial, 55)
        self.assertEqual(result[3].final, 60)
        self.assertEqual(result[4].initial, 70)
        self.assertEqual(result[4].final, 75)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 3)
        self.assertEqual(result[0].final, 30)
        self.assertEqual(result[1].initial, 50)
        self.assertEqual(result[1].final, 60)
        self.assertEqual(result[2].initial, 70)
        self.assertEqual(result[2].final, 85)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Different chromosomes
        A : chr1  -------
        B : chr2  -------
        R : none
        """
        self.region_sets([['chr1', 1, 10]], [['chr2', 1, 10]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 0)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        Completely included overlapping
        A : ---------------------------
        B : ----    ------       -----------
        R1: ----    ------       ------      (overlap)
        R2: ---------------------------      (original)
        R3:                                  (comp_incl)
        """
        self.region_sets([['chr1', 1, 50]],
                         [['chr1', 1, 5], ['chr1', 10, 19], ['chr1', 45, 60]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 45)
        self.assertEqual(result[2].final, 50)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 50)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 0)
        """
        A : ----    ------       -----------
        B : ---------------------------
        R1: ----    ------       ------      (overlap)
        R2: ----    ------       ----------- (original)
        R3: ----    ------                   (comp_incl)
        """

        self.region_sets([['chr1', 1, 5], ['chr1', 10, 19], ['chr1', 45, 60]],
                         [['chr1', 1, 50]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 45)
        self.assertEqual(result[2].final, 50)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        self.assertEqual(result[2].initial, 45)
        self.assertEqual(result[2].final, 60)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 5)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 19)
        """
        A : --------------         -------
                ------
        B :       -----          ----------------
        R1:       -----            -------      (overlap)
                  ----
        R2: --------------         -------      (original)
                ------
        R3:                        -------      (comp_incl)
        """
        self.region_sets([['chr1', 1, 50], ['chr1', 20, 40], ['chr1', 70, 80]],
                         [['chr1', 25, 45], ['chr1', 65, 95]])
        result = self.setA.intersect(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 25)
        self.assertEqual(result[0].final, 45)
        self.assertEqual(result[1].initial, 70)
        self.assertEqual(result[1].final, 80)

        result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[1].initial, 20)
        self.assertEqual(result[1].final, 40)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 50)
        self.assertEqual(result[2].initial, 70)
        self.assertEqual(result[2].final, 80)

        result = self.setA.intersect(self.setB, mode=OverlapType.COMP_INCL)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 70)
        self.assertEqual(result[0].final, 80)

    def test_closest(self):
        """
        Two empty sets
        A : none 
        B : none
        R : none
        """
        self.region_sets([], [])
        result = self.setA.closest(self.setB)
        self.assertEqual(len(result), 0)
        # """
        # One empty set
        # A :   -----
        # B : none
        # R : none
        # """
        # self.region_sets([['chr1',5,10]],
        #                  [])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 0)
        # """
        # A : none
        # B :   -----
        # R : none
        # """
        # self.region_sets([],
        #                  [['chr1',5,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 0)
        # """
        # Overlapping within set
        # A : -----====-----
        # B :      ----
        # R :      ----
        # """
        # self.region_sets([['chr1',1,10],['chr1',6,15]],
        #                  [['chr1',6,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 2)
        # """
        # A :      ----
        # B : -----====-----
        # R : -----====-----
        # """
        # self.region_sets([['chr1',6,10]],
        #                  [['chr1',1,10],['chr1',6,15]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 1)
        # """
        # No overlapping
        # A : ------      ---------               -------
        # B :        ----          ------  ------
        # R :                      ------
        # """
        # self.region_sets([['chr1',1,5],['chr1',11,20],['chr1',33,38]],
        #                  [['chr1',7,9],['chr1',20,25],['chr1',26,31]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 3)
        # # self.assertEqual(result[0].initial, 20)
        # # self.assertEqual(result[0].final, 25)
        # """
        # End-to-end attach
        # A : ------      ------
        # B :       ------
        # R :       ------
        # """
        # self.region_sets([['chr1',1,5],['chr1',11,20]],
        #                  [['chr1',5,11]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 2)
        # # self.assertEqual(result[0].initial, 5)
        # # self.assertEqual(result[0].final, 11)
        # """
        # Perfect overlapping
        # A : ------
        # B : ------
        # R : ------
        # """
        # self.region_sets([['chr1',1,10]],
        #                  [['chr1',1,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 1)
        # self.assertEqual(result[0].initial, 1)
        # self.assertEqual(result[0].final, 10)
        # """
        # One overlapping region
        # A : ------
        # B :     --------
        # R :     --------
        # """
        # self.region_sets([['chr1',1,10]],
        #                  [['chr1',7,20]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(result[0].initial, 7)
        # self.assertEqual(result[0].final, 20)
        # """
        # Two simple overlapping regions
        # A : -------      --------
        # B :     -------------
        # R :     -------------
        # """
        # self.region_sets([['chr1',1,10],['chr1',26,35]],
        #                  [['chr1',7,30]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(result[0].initial, 7)
        # self.assertEqual(result[0].final, 30)
        # """
        # Two separately overlapping regions
        # A : -------      --------
        # B :     -----        --------
        # R : none
        # """
        # self.region_sets([['chr1',1,10],['chr1',26,35]],
        #                  [['chr1',7,15],['chr1',30,40]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 2)
        # """
        # Many various overlapping (mixed)
        # A :   ------------------            --------   ---------
        # B : ----   -------    ------            ----------
        # R : none
        # """
        # self.region_sets([['chr1',3,30],['chr1',50,60],['chr1',70,85]],
        #                  [['chr1',1,5],['chr1',10,19],['chr1',27,35],['chr1',55,75]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 4)
        # """
        # Different chromosomes
        # A : chr1  -------
        # B : chr2  -------
        # R : chr2  -------
        #
        # """
        # self.region_sets([['chr1',1,10]],
        #                  [['chr2',1,10]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 0)
        # """
        # Completely included overlapping
        # A : ---------------------------
        # B : ----    ------       -----------
        # R : ----    ------       -----------
        # """
        # self.region_sets([['chr1',1,50]],
        #                  [['chr1',1,5],['chr1',10,19],['chr1',45,60]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 3)
        # """
        # A : ----    ------       -----------
        # B : ---------------------------
        # R : none
        # """
        # self.region_sets([['chr1',1,5],['chr1',10,19],['chr1',45,60]],
        #                  [['chr1',1,50]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(result, False)
        # """
        # A : ----         ------                  ---
        # B :        ---              -----
        # R :        ---
        # """
        # self.region_sets([['chr1',1,5],['chr1',27,45],['chr1',85,95]],
        #                  [['chr1',15,20],['chr1',55,65]])
        # result = self.setA.closest(self.setB)
        # self.assertEqual(len(result), 1)
        # self.assertEqual(result[0].initial, 15)
        # self.assertEqual(result[0].final, 20)

    def test_remove_duplicates(self):
        """
        A : ===== -----
        R : ----- -----
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 1, 10], ['chr1', 15, 25]],
                         [])
        self.setA.remove_duplicates()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        """
        A : =====--- -----
        R : =====--- -----
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 1, 15], ['chr1', 20, 25]],
                         [])
        self.setA.remove_duplicates()
        result = self.setA
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 1)
        self.assertEqual(result[1].final, 15)
        self.assertEqual(result[2].initial, 20)
        self.assertEqual(result[2].final, 25)
        """
        A : ===== ----- ------  ====
        R : ----- ----- ------  ----
        """
        self.region_sets(
            [['chr1', 1, 10], ['chr1', 1, 10], ['chr1', 15, 25],
             ['chr1', 30, 35], ['chr1', 40, 45], ['chr1', 40, 45]], [])
        self.setA.remove_duplicates()
        result = self.setA
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        self.assertEqual(result[2].initial, 30)
        self.assertEqual(result[2].final, 35)
        self.assertEqual(result[3].initial, 40)
        self.assertEqual(result[3].final, 45)

    def test_window(self):
        """
        A :             -------
        B : ------[ 99 ]       [   199   ]---
        window = 100
        R :       -                           only one base overlaps with extending A
        """
        self.region_sets([['chr1', 200, 300]],
                         [['chr1', 1, 101], ['chr1', 499, 550]])
        result = self.setA.window(self.setB, adding_length=100)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 100)
        self.assertEqual(result[0].final, 101)
        """
        A :             -------
        B : ------[ 99 ]       [   199   ]---
        window = 200
        R : ------                        -   
        left-hand side is covered, and the right-hand side is only one base overlapped
        """
        self.region_sets([['chr1', 200, 300]],
                         [['chr1', 1, 101], ['chr1', 499, 550]])
        result = self.setA.window(self.setB, adding_length=200)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial,
                         1)  # GenomicRegion.extend will choose 1 rather than 0
        self.assertEqual(result[0].final, 101)
        self.assertEqual(result[1].initial, 499)
        self.assertEqual(result[1].final, 500)
        """
        A :                         ----    ----
        B :             --------                    ----
        window = 1000 (default)
        R :                 ----                    ----
        """
        self.region_sets([['chr1', 3000, 3500], ['chr1', 4000, 4500]],
                         [['chr1', 1500, 2500], ['chr1', 5000, 5500]])
        result = self.setA.window(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 2000)
        self.assertEqual(result[0].final, 2500)
        self.assertEqual(result[1].initial, 5000)
        self.assertEqual(result[1].final, 5500)
        """
        A :                         ----    ----
        B :             --------                    ----
        window = 2000
        R :             --------                    ----
                            ----                    ----
        window = 100
        R : none
        """
        self.region_sets([['chr1', 3000, 3500], ['chr1', 4000, 4500]],
                         [['chr1', 1500, 2500], ['chr1', 5000, 5500]])
        result = self.setA.window(self.setB, adding_length=2000)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1500)
        self.assertEqual(result[0].final, 2500)
        self.assertEqual(result[1].initial, 5000)
        self.assertEqual(result[1].final, 5500)
        result = self.setA.window(self.setB, adding_length=100)
        self.assertEqual(len(result), 0)

    def test_subtract(self):
        """
        A : none
        B :    ------
        R : none
        """
        self.region_sets([], [['chr1', 6, 15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 0)
        """
        A :    ------
        B : none
        R :    ------
        """
        self.region_sets([['chr1', 6, 15]], [])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 6)
        self.assertEqual(result[0].final, 15)
        """
        A : ------
        B :    ------
        R : ---
        """
        self.region_sets([['chr1', 1, 10]], [['chr1', 6, 15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 6)
        """
        A :    ------
        B : ------
        R :       ---
        """
        self.region_sets([['chr1', 6, 15]], [['chr1', 1, 10]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 10)
        self.assertEqual(result[0].final, 15)
        """
        A :    ---
        B : ---------
        R : none
        """
        self.region_sets([['chr1', 6, 10]], [['chr1', 1, 15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 0)
        """
        A : ---------
        B :    ---
        R : ---   ---
        """
        self.region_sets([['chr1', 1, 15]], [['chr1', 6, 10]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 6)
        self.assertEqual(result[1].initial, 10)
        self.assertEqual(result[1].final, 15)
        """
        A :    ------
        B :    ------
        R : none
        """
        self.region_sets([['chr1', 6, 15]], [['chr1', 6, 15]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 0)
        """
        A :   ----------              ------
        B :          ----------                    ----
        R :   -------                 ------
        """
        self.region_sets([['chr1', 5, 30], ['chr1', 70, 85]],
                         [['chr1', 20, 50], ['chr1', 100, 110]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 5)
        self.assertEqual(result[0].final, 20)
        self.assertEqual(result[1].initial, 70)
        self.assertEqual(result[1].final, 85)
        """
        A :        ------   -----
        B :    ------
        R :          ----   -----
        """
        self.region_sets([['chr1', 20, 30], ['chr1', 35, 55]],
                         [['chr1', 10, 23], ['chr1', 100, 110]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 23)
        self.assertEqual(result[0].final, 30)
        self.assertEqual(result[1].initial, 35)
        self.assertEqual(result[1].final, 55)
        """
        A :   ch1     ---------------------
              ch2     -------------------------
        B :   ch1             ------
              ch2                        ------
        R :   ch1     --------      -------
              ch2     -------------------
        """
        self.region_sets([['chr1', 0, 30000], ['chr2', 0, 35000]],
                         [['chr1', 20000, 23000], ['chr2', 31000, 35000]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].initial, 0)
        self.assertEqual(result[0].final, 20000)
        self.assertEqual(result[1].initial, 23000)
        self.assertEqual(result[1].final, 30000)
        self.assertEqual(result[2].initial, 0)
        self.assertEqual(result[2].final, 31000)
        """
        A :   -----------------------------------------------------------
        B :    ---    ---------         ----           ----
        R :   -   ----         ---------    -----------    --------------
        """
        self.region_sets([['chr1', 5, 1000]],
                         [['chr1', 10, 15], ['chr1', 30, 70],
                          ['chr1', 120, 140], ['chr1', 200, 240]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 5)
        """
        A :   -----------------------              ------
                   -----     -----  -----------
        B :    ---    ---------         ----           ----
        R :   -   ----         ------              ----
                   ---         ---  ----    ---
        """
        self.region_sets([['chr1', 5, 100], ['chr1', 20, 40], ['chr1', 60, 80],
                          ['chr1', 95, 150], ['chr1', 180, 220]],
                         [['chr1', 10, 15], ['chr1', 30, 70],
                          ['chr1', 120, 140], ['chr1', 200, 240]])
        result = self.setA.subtract(self.setB)
        # print(result.sequences)
        self.assertEqual(len(result), 8)
        self.assertEqual(result[0].initial, 5)
        """
        A :   -----------------------------------------------------------
        B :    ---    ---------         ----           ----
        R :   -   ----         ---------    -----------    --------------
        """
        self.region_sets(
            [['chr1', 5, 1000], ['chr2', 5, 1000], ['chr4', 5, 1000]],
            [['chr1', 10, 15], ['chr1', 30, 70], ['chr1', 120, 140],
             ['chr1', 200, 240], ['chr2', 10, 15], ['chr2', 30, 70],
             ['chr2', 120, 140], ['chr2', 200, 240], ['chr4', 10, 15],
             ['chr4', 30, 70], ['chr4', 120, 140], ['chr4', 200, 240]])
        result = self.setA.subtract(self.setB)
        self.assertEqual(len(result), 15)

    def test_merge(self):
        """
        A : none
        R : none
        """
        self.region_sets([], [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 0)
        """
        A : -----  -----
        R : -----  -----
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 15, 25]], [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        """
        A1: ------------   ----
        A2:    -----
        R : ------------   ----
        """
        self.region_sets([['chr1', 1, 30], ['chr1', 11, 20], ['chr1', 40, 50]],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 30)
        self.assertEqual(result[1].initial, 40)
        self.assertEqual(result[1].final, 50)
        """
        A1: --------       ----
        A2:    ---------
        R : ------------   ----
        """
        self.region_sets([['chr1', 1, 30], ['chr1', 20, 40], ['chr1', 50, 60]],
                         [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 40)
        self.assertEqual(result[1].initial, 50)
        self.assertEqual(result[1].final, 60)
        """
        A : =======
        R : -------
        """
        self.region_sets([['chr1', 1, 30], ['chr1', 1, 30]], [])
        self.setA.merge()
        result = self.setA
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 30)

    def test_cluster(self):
        """
        Empty sets
        A : none 
        R : none
        """
        self.region_sets([], [])
        result = self.setA.cluster(10)
        self.assertEqual(len(result), 0)
        """
        A :  ------- 
        R :  -------
        """
        self.region_sets([['chr1', 1, 10]], [])
        result = self.setA.cluster(10)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        """
        A :  -----
                  ------
        R :  -----------
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 10, 20]], [])
        result = self.setA.cluster(10)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 20)
        """
        A :  -----  -----
        R1:  -----  -----
        R2:  ------------
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 15, 25]], [])
        result = self.setA.cluster(1)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        result = self.setA.cluster(5)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 10)
        self.assertEqual(result[1].initial, 15)
        self.assertEqual(result[1].final, 25)
        result = self.setA.cluster(6)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0].initial, 1)
        self.assertEqual(result[0].final, 25)
        """
        A :  ---- ----  ----   ----    ----
        R1:  ---------  ----   ----    ----
        R2:  ---------------   ----    ----
        R3:  ----------------------    ----
        R4:  ------------------------------
        R5:  ------------------------------
        """
        self.region_sets([['chr1', 1, 10], ['chr1', 15, 25], ['chr1', 35, 45],
                          ['chr1', 60, 70], ['chr1', 90, 100]], [])
        result = self.setA.cluster(6)
        self.assertEqual(len(result), 4)
        result = self.setA.cluster(11)
        self.assertEqual(len(result), 3)
        result = self.setA.cluster(16)
        self.assertEqual(len(result), 2)
        result = self.setA.cluster(21)
        self.assertEqual(len(result), 1)
        result = self.setA.cluster(26)
        self.assertEqual(len(result), 1)

    def test_flank(self):
        """
        A :        -----
        R1:     ---     ---
        """
        self.region_sets([['chr1', 60, 75]], [])
        result = self.setA.flank(10)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].initial, 50)
        self.assertEqual(result[0].final, 60)
        self.assertEqual(result[1].initial, 75)
        self.assertEqual(result[1].final, 85)
        """
        A :        -----     ----
        R1:   -----     =====    ----
        """
        self.region_sets([['chr1', 60, 75], ['chr1', 90, 100]], [])
        result = self.setA.flank(15)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0].initial, 45)
        self.assertEqual(result[0].final, 60)
        self.assertEqual(result[1].initial, 75)
        self.assertEqual(result[1].final, 90)
        self.assertEqual(result[2].initial, 75)
        self.assertEqual(result[2].final, 90)
        self.assertEqual(result[3].initial, 100)
        self.assertEqual(result[3].final, 115)

    def test_jaccard(self):
        """
        self           --8--      ---10---      -4-
        y         ---10---             ---10---
        intersect      -5-             -4-    
        similarity:   ( 5 + 4 )/[(8 + 10 + 4) + (10 +10) - (5 + 4 )]
                      = 9/33
        """
        self.region_sets(
            [['chr1', 50, 58], ['chr1', 70, 80], ['chr1', 90, 94]],
            [['chr1', 45, 55], ['chr1', 76, 86]])
        result = self.setA.jaccard(self.setB)
        self.assertEqual(result, 9 / 33)

    def test_get_genome_data(self):
        """hg19"""
        result = GenomicRegionSet("hg19")
        result.get_genome_data(organism="hg19")
        self.assertEqual(len(result), 23)
        """hg19, with Mitochondria chromosome"""
        result = GenomicRegionSet("hg19")
        result.get_genome_data(organism="hg19", chrom_M=True)
        self.assertEqual(len(result), 24)

    def test_random_regions(self):

        self.region_sets(
            [['chr1', 0, 10000], ['chr2', 0, 20000], ['chrX', 0, 30000]], [])
        result = self.setA.random_regions(organism="mm9",
                                          total_size=100,
                                          overlap_result=False,
                                          overlap_input=False)
        result.sort()
        # print("-"*80)
        # print("The result random regions are: ")
        # for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        # print("Overlaps within result: ",result.within_overlap())

        self.region_sets(
            [['chr1', 0, 10000], ['chr2', 0, 20000], ['chrX', 0, 30000]], [])
        result = self.setA.random_regions(organism="mm9",
                                          total_size=100,
                                          overlap_result=True,
                                          overlap_input=False)
        result.sort()
        # print("-"*80)
        # print("The result random regions are: ")
        # for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        # print("Overlaps within result: ",result.within_overlap())

        self.region_sets(
            [['chr1', 0, 10000], ['chr2', 0, 20000], ['chrX', 0, 30000]], [])
        result = self.setA.random_regions(organism="mm9",
                                          total_size=100,
                                          overlap_result=False,
                                          overlap_input=True)
        result.sort()
        # print("-"*80)
        # print("The result random regions are: ")
        # for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        # print("Overlaps within result: ",result.within_overlap())

        self.region_sets(
            [['chr1', 0, 10000], ['chr2', 0, 20000], ['chrX', 0, 30000]], [])
        result = self.setA.random_regions(organism="mm9",
                                          total_size=100,
                                          overlap_result=True,
                                          overlap_input=True)
        result.sort()
        # print("-"*80)
        # print("The result random regions are: ")
        # for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        # print("Overlaps within result: ",result.within_overlap())

        self.region_sets(
            [['chr1', 0, 1000], ['chr2', 0, 2000], ['chrX', 0, 3000]], [])
        result = self.setA.random_regions(organism="mm9",
                                          multiply_factor=100,
                                          overlap_result=False,
                                          overlap_input=False)
        result.sort()
        # print("-"*80)
        # print("The result random regions are: ")
        # for s in result.sequences:
        #    print("\t%s\t%10d\t%10d%10d" % (s.chrom,s.initial,s.final,s.__len__()))
        # print("Overlaps within result: ",result.within_overlap())

        self.region_sets(
            [['chr1', 0, 1000], ['chr2', 0, 2000], ['chrX', 0, 3000]], [])
        result = self.setA.random_regions(organism="mm9",
                                          multiply_factor=100,
                                          overlap_result=False,
                                          overlap_input=False,
                                          chrom_M=True)
        result.sort()