Ejemplo n.º 1
0
    def test_mutability_1(self):
        rs0 = RangeSet("2-5")
        rs1 = RangeSet("0-1")
        rn0 = RangeSetND([[rs0, rs1]])  #, copy_rangeset=False)
        self.assertEqual(str(rn0), "2-5; 0-1\n")

        rs2 = RangeSet("6-7")
        rs3 = RangeSet("2-3")
        rn1 = RangeSetND([[rs2, rs3]])  #, copy_rangeset=False)
        rn0.update(rn1)
        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")

        # check mutability safety
        self.assertEqual(str(rs0), "2-5")
        self.assertEqual(str(rs1), "0-1")
        self.assertEqual(str(rs2), "6-7")
        self.assertEqual(str(rs3), "2-3")

        # reverse check
        rs1.add(2)
        self.assertEqual(str(rs1), "0-2")
        rs3.add(4)
        self.assertEqual(str(rs3), "2-4")
        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")

        self.assertEqual(str(rn1), "6-7; 2-3\n")
        rn1.update([[rs2, rs3]])
        self.assertEqual(str(rn1), "6-7; 2-4\n")

        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")
Ejemplo n.º 2
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     token_array_position = 0
     for idx, witness in enumerate(self.witnesses):
         # print("witness.tokens",witness.tokens())
         witness_range = RangeSet()
         witness_range.add_range(self.counter,
                                 self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         # remember get tokens twice
         sigil = witness.sigil
         for token in witness.tokens():
             token.token_data['_sigil'] = sigil
             token.token_data[
                 '_token_array_position'] = token_array_position
             token_array_position += 1
         self.token_array.extend(witness.tokens())
         # # add marker token
         self.token_array.append(
             Token({
                 "n": '$' + str(idx),
                 '_sigil': sigil
             }))
         token_array_position += 1
     self.token_array.pop()  # remove last marker
Ejemplo n.º 3
0
 def test_iand(self):
     """test RangeSet.__iand__()"""
     r1 = RangeSet("1,3-9,14-21,30-39,42")
     r2 = RangeSet("2-5,10-32,35,40-41")
     r1 &= r2
     self.assertEqual(len(r1), 15)
     self.assertEqual(str(r1), "3-5,14-21,30-32,35")
Ejemplo n.º 4
0
 def test_ior(self):
     """test RangeSet.__ior__()"""
     r1 = RangeSet("1,3-9,14-21,30-39,42")
     r2 = RangeSet("2-5,10-32,35,40-41")
     r1 |= r2
     self.assertEqual(len(r1), 42)
     self.assertEqual(str(r1), "1-42")
Ejemplo n.º 5
0
 def test_isub(self):
     """test RangeSet.__isub__()"""
     r1 = RangeSet("1,3-9,14-21,30-39,42")
     r2 = RangeSet("2-5,10-32,35,40-41")
     r1 -= r2
     self.assertEqual(len(r1), 12)
     self.assertEqual(str(r1), "1,6-9,33-34,36-39,42")
Ejemplo n.º 6
0
 def test_ixor(self):
     """test RangeSet.__ixor__()"""
     r1 = RangeSet("1,3-9,14-21,30-39,42")
     r2 = RangeSet("2-5,10-32,35,40-41")
     r1 ^= r2
     self.assertEqual(len(r1), 27)
     self.assertEqual(str(r1), "1-2,6-13,22-29,33-34,36-42")
Ejemplo n.º 7
0
 def _extract_conf(self, cfg):
     """Extract cluster nodes configuration"""
     conf = {"default": {}}
     for key, val in cfg.iteritems():
         if key == 'default':
             conf['default'].update(val)
         elif isinstance(val, dict):
             if isinstance(key, int):
                 rset = RangeSet.fromone(key)
             else:
                 try:
                     rset = RangeSet(key)
                 except RangeSetParseError as err:
                     _LOGGER.warning(
                         "Error in configuration file:"
                         " %s. Ingnoring this part", err)
                     continue
             for idx in rset:
                 conf[idx] = val
         else:
             conf['default'][key] = val
     try:
         conf = clustdock.format_dict(conf, **self.__dict__)
     except KeyError:
         _LOGGER.exception("Key not found:")
     return conf
Ejemplo n.º 8
0
 def testClear(self):
     """test RangeSet.clear()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     self.assertEqual(str(r1), "1-100,102,105-242,800")
     r1.clear()
     self.assertEqual(len(r1), 0)
     self.assertEqual(str(r1), "")
 def test_witness_ranges_hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     self.assertEquals(RangeSet("0-14"),
                       collation.get_range_for_witness("W1"))
     self.assertEquals(RangeSet("17-29"),
                       collation.get_range_for_witness("W2"))
Ejemplo n.º 10
0
 def testClear(self):
     """test RangeSet.clear()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     self.assertEqual(str(r1), "1-100,102,105-242,800")
     r1.clear()
     self.assertEqual(len(r1), 0)
     self.assertEqual(str(r1), "")
Ejemplo n.º 11
0
 def test_vectors(self):
     rn = RangeSetND([["0-10", "1-2"], ["5-60", "2"]])
     # vectors() should perform automatic folding
     self.assertEqual([[RangeSet("0-60"), RangeSet("2")],
                       [RangeSet("0-10"), RangeSet("1")]],
                      list(rn.vectors()))
     self.assertEqual(str(rn), "0-60; 2\n0-10; 1\n")
     self.assertEqual(len(rn), 72)
Ejemplo n.º 12
0
 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 17-25")),
                   blocks)  # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 26-29")), blocks)  # q r s t
Ejemplo n.º 13
0
 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 16-24")),
                   blocks)  # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 25-28")), blocks)  # q r s t
Ejemplo n.º 14
0
 def get_compute_rangeset(self):
     """ returns rangeset of compute ids
     """
     rset = RangeSet()
     for child in self.get_children():
         mat = re.match(self._comp_regex, child)
         if mat:
             mdic = mat.groupdict()
             rset.union_update(RangeSet(str(mdic['id'])))
     return rset
Ejemplo n.º 15
0
 def test_blocks_failing_transposition_use_case_old_algorithm(self):
     collation = Collation()
     collation.add_plain_witness("W1", "the cat and the dog")
     collation.add_plain_witness("W2", "the dog and the cat")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-1, 9-10"))
     block2 = Block(RangeSet("3-4, 6-7"))
     block3 = Block(RangeSet("2, 8"))
     self.assertEqual([block1, block2, block3], blocks)
 def get_compute_rangeset(self):
     """ returns rangeset of compute ids
     """
     rset = RangeSet()
     for child in self.get_children():
         mat = re.match(self._comp_regex, child)
         if mat:
             mdic = mat.groupdict()
             rset.union_update(RangeSet(str(mdic["id"])))
     return rset
Ejemplo n.º 17
0
 def test_2(self):
     collation = Collation()
     collation.add_plain_witness("W1", "in the in the bleach")
     collation.add_plain_witness("W2", "in the in the bleach in the")
     collation.add_plain_witness("W3", "in the in the bleach in the")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-4, 6-10, 14-18")),
                   blocks)  # in the in the bleach
     self.assertIn(Block(RangeSet("11-12, 19-20")), blocks)  # in the
Ejemplo n.º 18
0
 def test_witness_ranges_hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     self.assertEquals(RangeSet("0-14"),
                       token_index.get_range_for_witness("W1"))
     self.assertEquals(RangeSet("16-28"),
                       token_index.get_range_for_witness("W2"))
Ejemplo n.º 19
0
 def test_pickle_current(self):
     """test RangeSet pickling (current version)"""
     dump = pickle.dumps(RangeSet("1-100"))
     self.assertNotEqual(dump, None)
     rngset = pickle.loads(dump)
     self.assertEqual(rngset, RangeSet("1-100"))
     self.assertEqual(str(rngset), "1-100")
     self.assertEqual(rngset[0], 1)
     self.assertEqual(rngset[1], 2)
     self.assertEqual(rngset[-1], 100)
Ejemplo n.º 20
0
 def calculate_non_overlapping_range_with(self, occupied):
     # convert block occurrences into ranges
     potential_block_range = RangeSet()
     for occurrence in self.block_occurrences():
         potential_block_range.add_range(occurrence, occurrence + self.minimum_block_length)
     #check the intersection with the already occupied ranges
     block_intersection = potential_block_range.intersection(occupied)
     if not block_intersection:
         # no overlap, return complete block_range
         return potential_block_range
     # There is overlap with occupied range
     # we need to deal with it
     real_block_range = RangeSet()
     for lower in potential_block_range.contiguous():
         # TODO: what I really want here is a find first over a generator
         upper = [x for x in block_intersection.contiguous() if x[0] >= lower[0]]
         if upper:
             lower = lower[0]
             upper = upper[0][0]
             if lower != upper:
                 real_block_range.add_range(lower, upper)
     if not real_block_range:
         # There is complete overlap, so return None
         return None
     # Assert: check that the first slice is not larger than potential block length!
     first_range = real_block_range.contiguous().next()
     if first_range[-1]-first_range[0]+1>self.minimum_block_length:
         raise PartialOverlapException()
     return real_block_range
Ejemplo n.º 21
0
 def testFromListConstructor(self):
     """test RangeSet.fromlist() constructor"""
     rgs = RangeSet.fromlist([ "3", "5-8", "1" ])
     self.assertEqual(str(rgs), "1,3,5-8")
     self.assertEqual(len(rgs), 6)
     rgs = RangeSet.fromlist([ RangeSet("3"), RangeSet("5-8"), RangeSet("1") ])
     self.assertEqual(str(rgs), "1,3,5-8")
     self.assertEqual(len(rgs), 6)
     rgs = RangeSet.fromlist([set([3,5,6,7,8,1])])
     self.assertEqual(str(rgs), "1,3,5-8")
     self.assertEqual(len(rgs), 6)
Ejemplo n.º 22
0
 def testDiscard(self):
     """test RangeSet.discard()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r1.discard(100)
     self.assertEqual(len(r1), 239)
     self.assertEqual(str(r1), "1-99,102,105-242,800")
     r1.discard(101)  # should not raise KeyError
     # test remove integer-castable type (convenience)
     r1.remove("106")
     r1.discard("foo")
Ejemplo n.º 23
0
 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks)  # a b c d
     self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks)  # g h i
     self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")),
                   blocks)  # ! q r s t
     self.assertIn(Block(RangeSet("4, 20")), blocks)  # F
Ejemplo n.º 24
0
 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks)  # a b c d
     self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks)  # g h i
     self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")),
                   blocks)  # ! q r s t
     self.assertIn(Block(RangeSet("4, 21")), blocks)  # F
Ejemplo n.º 25
0
 def testRemove(self):
     """test RangeSet.remove()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r1.remove(100)
     self.assertEqual(len(r1), 239)
     self.assertEqual(str(r1), "1-99,102,105-242,800")
     self.assertRaises(KeyError, r1.remove, 101)
     # test remove integer-castable type (convenience)
     r1.remove("106")
     # non integer castable cases raise ValueError (documented since 1.6)
     self.assertRaises(ValueError, r1.remove, "foo")
Ejemplo n.º 26
0
 def test_simple(self):
     # Test constructors
     self._testRS(None, "", 0)
     self._testRS([["0-10"], ["40-60"]], "0-10,40-60\n", 32)
     self._testRS([["0-2", "1-2"], ["10", "3-5"]], "0-2; 1-2\n10; 3-5\n", 9)
     self._testRS([[0, 1], [0, 2], [2, 2], [2, 1], [1, 1], [1, 2], [10, 4], [10, 5], [10, 3]], "0-2; 1-2\n10; 3-5\n", 9)
     self._testRS([(0, 4), (0, 5), (1, 4), (1, 5)], "0-1; 4-5\n", 4)
     # construct with copy_rangeset=False
     r0 = RangeSet("0-10,30-40,50")
     r1 = RangeSet("200-202")
     rn = RangeSetND([[r0, r1]], copy_rangeset=False)
     self.assertEqual(str(rn), "0-10,30-40,50; 200-202\n")
     self.assertEqual(len(rn), 69)
Ejemplo n.º 27
0
 def add_witness(self, witnessdata):
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     witness = Witness(witnessdata)
     self.witnesses.append(witness)
     witness_range = RangeSet()
     witness_range.add_range(self.counter, self.counter+len(witness.tokens()))
     # the extra one is for the marker token
     self.counter += len(witness.tokens()) +2 # $ + number 
     self.witness_ranges[witness.sigil] = witness_range
     if not self.combined_string == "":
         self.combined_string += " $"+str(len(self.witnesses)-1)+ " "
     self.combined_string += witness.content
Ejemplo n.º 28
0
 def testIntersectionLength(self):
     """test RangeSet intersection/length"""
     r1 = RangeSet("115-117,130,166-170,4780-4999")
     self.assertEqual(len(r1), 229)
     r2 = RangeSet("116-117,130,4781-4999")
     self.assertEqual(len(r2), 222)
     res = r1.intersection(r2)
     self.assertEqual(len(res), 222)
     r1 = RangeSet("115-200")
     self.assertEqual(len(r1), 86)
     r2 = RangeSet("116-117,119,123-131,133,149,199")
     self.assertEqual(len(r2), 15)
     res = r1.intersection(r2)
     self.assertEqual(len(res), 15)
     # StopIteration test
     r1 = RangeSet("115-117,130,166-170,4780-4999,5003")
     self.assertEqual(len(r1), 230)
     r2 = RangeSet("116-117,130,4781-4999")
     self.assertEqual(len(r2), 222)
     res = r1.intersection(r2)
     self.assertEqual(len(res), 222)
     # StopIteration test2
     r1 = RangeSet("130,166-170,4780-4999")
     self.assertEqual(len(r1), 226)
     r2 = RangeSet("116-117")
     self.assertEqual(len(r2), 2)
     res = r1.intersection(r2)
     self.assertEqual(len(res), 0)
Ejemplo n.º 29
0
    def test_mutability_2(self):
        rs0 = RangeSet("2-5")
        rs1 = RangeSet("0-1")
        rn0 = RangeSetND([[rs0, rs1]]) #, copy_rangeset=False)
        self.assertEqual(str(rn0), "2-5; 0-1\n")

        rs2 = RangeSet("6-7")
        rs3 = RangeSet("2-3")
        rn0.update([[rs2, rs3]])
        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")

        rs3.add(4)
        self.assertEqual(str(rs3), "2-4")
        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")
Ejemplo n.º 30
0
 def testDiscard(self):
     """test RangeSet.discard()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r1.discard(100)
     self.assertEqual(len(r1), 239)
     self.assertEqual(str(r1), "1-99,102,105-242,800")
     r1.discard(101)     # should not raise KeyError
     # test remove integer-castable type (convenience)
     r1.remove("106")
     r1.discard("foo")
Ejemplo n.º 31
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     for idx, witness in enumerate(self.witnesses):
         witness_range = RangeSet()
         witness_range.add_range(self.counter, self.counter+len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         if self.token_array:
             # add marker token
             self.token_array.append(Token({"n":"$"+str(idx-1)}))
         # remember get tokens twice
         self.token_array.extend(witness.tokens())
Ejemplo n.º 32
0
 def testIsSuperSet(self):
     """test RangeSet.issuperset()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r2 = RangeSet("3-98,140-199,800")
     self.assertEqual(len(r2), 157)
     self.assertTrue(r1.issuperset(r1))
     self.assertTrue(r1.issuperset(r2))
     self.assertTrue(r1 >= r1)
     self.assertTrue(r1 > r2)
     self.assertFalse(r2 > r1)
     r2 = RangeSet("3-98,140-199,243,800")
     self.assertEqual(len(r2), 158)
     self.assertFalse(r1.issuperset(r2))
     self.assertFalse(r1 > r2)
Ejemplo n.º 33
0
 def add_witness(self, witnessdata):
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     witness = Witness(witnessdata)
     self.witnesses.append(witness)
     witness_range = RangeSet()
     witness_range.add_range(self.counter,
                             self.counter + len(witness.tokens()))
     # the extra one is for the marker token
     self.counter += len(witness.tokens()) + 2  # $ + number
     self.witness_ranges[witness.sigil] = witness_range
     if len(self.witnesses) > 1:
         self.combined_tokens.append('$')
         self.combined_tokens.append(str(len(self.witnesses) - 1))
     for tk in witness.tokens():
         self.combined_tokens.append(tk.token_string)
Ejemplo n.º 34
0
 def testIsSubSet(self):
     """test RangeSet.issubset()"""
     r1 = RangeSet("1-100,102,105-242,800-900/2")
     r2 = RangeSet("3,800,802,804,888")
     self.assertTrue(r2.issubset(r2))
     self.assertTrue(r2.issubset(r1))
     self.assertTrue(r2 <= r1)
     self.assertTrue(r2 < r1)
     self.assertTrue(r1 > r2)
     self.assertFalse(r1 < r2)
     self.assertFalse(r1 <= r2)
     self.assertFalse(r2 >= r1)
     # since v1.6, padding is ignored when computing set operations
     r1 = RangeSet("1-100")
     r2 = RangeSet("001-100")
     self.assertTrue(r1.issubset(r2))
Ejemplo n.º 35
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     for idx, witness in enumerate(self.witnesses):
         witness_range = RangeSet()
         witness_range.add_range(self.counter,
                                 self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         if self.token_array:
             # add marker token
             self.token_array.append(Token({"n": "$" + str(idx - 1)}))
         # remember get tokens twice
         self.token_array.extend(witness.tokens())
Ejemplo n.º 36
0
 def testIterator(self):
     """test RangeSet iterator"""
     matches = [ 1, 3, 4, 5, 6, 7, 8, 11 ]
     rgs = RangeSet.fromlist([ "11", "3", "5-8", "1", "4" ])
     cnt = 0
     for rg in rgs:
         self.assertEqual(rg, matches[cnt])
         cnt += 1
     self.assertEqual(cnt, len(matches))
     # with padding
     rgs = RangeSet.fromlist([ "011", "003", "005-008", "001", "004" ])
     cnt = 0
     for rg in rgs:
         self.assertTrue(type(rg) is int)
         self.assertEqual(rg, matches[cnt])
         cnt += 1
     self.assertEqual(cnt, len(matches))
Ejemplo n.º 37
0
 def test_blocks_splitting_token_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a c b c")
     collation.add_plain_witness("W2", "a c b")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 5-7")) # a c b
     self.assertIn(block1, blocks)
Ejemplo n.º 38
0
 def testIterator(self):
     """test RangeSet iterator"""
     matches = [1, 3, 4, 5, 6, 7, 8, 11]
     rgs = RangeSet.fromlist(["11", "3", "5-8", "1", "4"])
     cnt = 0
     for rg in rgs:
         self.assertEqual(rg, matches[cnt])
         cnt += 1
     self.assertEqual(cnt, len(matches))
     # with padding
     rgs = RangeSet.fromlist(["011", "003", "005-008", "001", "004"])
     cnt = 0
     for rg in rgs:
         self.assertTrue(type(rg) is int)
         self.assertEqual(rg, matches[cnt])
         cnt += 1
     self.assertEqual(cnt, len(matches))
Ejemplo n.º 39
0
 def testStringIterator(self):
     """test RangeSet string iterator striter()"""
     matches = [ 1, 3, 4, 5, 6, 7, 8, 11 ]
     rgs = RangeSet.fromlist([ "11", "3", "5-8", "1", "4" ])
     cnt = 0
     for rg in rgs.striter():
         self.assertEqual(rg, str(matches[cnt]))
         cnt += 1
     self.assertEqual(cnt, len(matches))
     # with padding
     rgs = RangeSet.fromlist([ "011", "003", "005-008", "001", "004" ])
     cnt = 0
     for rg in rgs.striter():
         self.assertTrue(type(rg) is str)
         self.assertEqual(rg, "%0*d" % (3, matches[cnt]))
         cnt += 1
     self.assertEqual(cnt, len(matches))
Ejemplo n.º 40
0
 def test_non_overlapping_blocks_black_cat(self):
     collation = Collation()
     collation.add_plain_witness("W1", "the black cat")
     collation.add_plain_witness("W2", "the black cat")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 4-6"))
     self.assertEqual([block1], blocks)
Ejemplo n.º 41
0
 def testStringIterator(self):
     """test RangeSet string iterator striter()"""
     matches = [1, 3, 4, 5, 6, 7, 8, 11]
     rgs = RangeSet.fromlist(["11", "3", "5-8", "1", "4"])
     cnt = 0
     for rg in rgs.striter():
         self.assertEqual(rg, str(matches[cnt]))
         cnt += 1
     self.assertEqual(cnt, len(matches))
     # with padding
     rgs = RangeSet.fromlist(["011", "003", "005-008", "001", "004"])
     cnt = 0
     for rg in rgs.striter():
         self.assertTrue(type(rg) is str)
         self.assertEqual(rg, "%0*d" % (3, matches[cnt]))
         cnt += 1
     self.assertEqual(cnt, len(matches))
Ejemplo n.º 42
0
 def testConstructorIterate(self):
     """test RangeSet(iterable) constructor"""
     # from list
     rgs = RangeSet([3,5,6,7,8,1])
     self.assertEqual(str(rgs), "1,3,5-8")
     self.assertEqual(len(rgs), 6)
     rgs.add(10)
     self.assertEqual(str(rgs), "1,3,5-8,10")
     self.assertEqual(len(rgs), 7)
     # from set
     rgs = RangeSet(set([3,5,6,7,8,1]))
     self.assertEqual(str(rgs), "1,3,5-8")
     self.assertEqual(len(rgs), 6)
     # from RangeSet
     r1 = RangeSet("1,3,5-8")
     rgs = RangeSet(r1)
     self.assertEqual(str(rgs), "1,3,5-8")
     self.assertEqual(len(rgs), 6)
Ejemplo n.º 43
0
 def testUpdate(self):
     """test RangeSet.update()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r2 = RangeSet("243-799,1924-1984")
     self.assertEqual(len(r2), 618)
     r1.update(r2)
     self.assertEqual(type(r1), RangeSet)
     self.assertEqual(r1.padding, None)
     self.assertEqual(len(r1), 240 + 618)
     self.assertEqual(str(r1), "1-100,102,105-800,1924-1984")
     r1 = RangeSet("1-100,102,105-242,800")
     r1.union_update(r2)
     self.assertEqual(len(r1), 240 + 618)
     self.assertEqual(str(r1), "1-100,102,105-800,1924-1984")
Ejemplo n.º 44
0
 def testCopy(self):
     """test RangeSet.copy()"""
     rangeset = RangeSet("115-117,130,166-170,4780-4999")
     self.assertEqual(len(rangeset), 229)
     self.assertEqual(str(rangeset), "115-117,130,166-170,4780-4999")
     r1 = rangeset.copy()
     r2 = rangeset.copy()
     self.assertEqual(rangeset, r1)  # content equality
     r1.remove(166)
     self.assertEqual(len(rangeset), len(r1) + 1)
     self.assertNotEqual(rangeset, r1)
     self.assertEqual(str(rangeset), "115-117,130,166-170,4780-4999")
     self.assertEqual(str(r1), "115-117,130,167-170,4780-4999")
     r2.update(RangeSet("118"))
     self.assertNotEqual(rangeset, r2)
     self.assertNotEqual(r1, r2)
     self.assertEqual(len(rangeset) + 1, len(r2))
     self.assertEqual(str(rangeset), "115-117,130,166-170,4780-4999")
     self.assertEqual(str(r1), "115-117,130,167-170,4780-4999")
     self.assertEqual(str(r2), "115-118,130,166-170,4780-4999")
Ejemplo n.º 45
0
 def testCopy(self):
     """test RangeSet.copy()"""
     rangeset = RangeSet("115-117,130,166-170,4780-4999")
     self.assertEqual(len(rangeset), 229)
     self.assertEqual(str(rangeset), "115-117,130,166-170,4780-4999")
     r1 = rangeset.copy()
     r2 = rangeset.copy()
     self.assertEqual(rangeset, r1) # content equality
     r1.remove(166)
     self.assertEqual(len(rangeset), len(r1) + 1)
     self.assertNotEqual(rangeset, r1)
     self.assertEqual(str(rangeset), "115-117,130,166-170,4780-4999")
     self.assertEqual(str(r1), "115-117,130,167-170,4780-4999")
     r2.update(RangeSet("118"))
     self.assertNotEqual(rangeset, r2)
     self.assertNotEqual(r1, r2)
     self.assertEqual(len(rangeset) + 1, len(r2))
     self.assertEqual(str(rangeset), "115-117,130,166-170,4780-4999")
     self.assertEqual(str(r1), "115-117,130,167-170,4780-4999")
     self.assertEqual(str(r2), "115-118,130,166-170,4780-4999")
Ejemplo n.º 46
0
 def _iterbase(self):
     """Iterator on single, one-item NodeSetBase objects."""
     for pat, ivec, pad, autostep in self._iter():
         rset = None     # 'no node index' by default
         if ivec is not None:
             assert len(ivec) > 0
             if len(ivec) == 1:
                 rset = RangeSet.fromone(ivec[0], pad[0] or 0, autostep)
             else:
                 rset = RangeSetND([ivec], pad, autostep)
         yield NodeSetBase(pat, rset)
Ejemplo n.º 47
0
 def testFromOneConstructor(self):
     """test RangeSet.fromone() constructor"""
     rgs = RangeSet.fromone(42)
     self.assertEqual(str(rgs), "42")
     self.assertEqual(len(rgs), 1)
     # also support slice object (v1.6+)
     rgs = RangeSet.fromone(slice(42))
     self.assertEqual(str(rgs), "0-41")
     self.assertEqual(len(rgs), 42)
     self.assertRaises(ValueError, RangeSet.fromone, slice(12, None))
     rgs = RangeSet.fromone(slice(42, 43))
     self.assertEqual(str(rgs), "42")
     self.assertEqual(len(rgs), 1)
     rgs = RangeSet.fromone(slice(42, 48))
     self.assertEqual(str(rgs), "42-47")
     self.assertEqual(len(rgs), 6)
     rgs = RangeSet.fromone(slice(42, 57, 2))
     self.assertEqual(str(rgs), "42,44,46,48,50,52,54,56")
     rgs.autostep = 3
     self.assertEqual(str(rgs), "42-56/2")
     self.assertEqual(len(rgs), 8)
Ejemplo n.º 48
0
    def test_mutability_1(self):
        rs0 = RangeSet("2-5")
        rs1 = RangeSet("0-1")
        rn0 = RangeSetND([[rs0, rs1]]) #, copy_rangeset=False)
        self.assertEqual(str(rn0), "2-5; 0-1\n")

        rs2 = RangeSet("6-7")
        rs3 = RangeSet("2-3")
        rn1 = RangeSetND([[rs2, rs3]]) #, copy_rangeset=False)
        rn0.update(rn1)
        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")

        # check mutability safety
        self.assertEqual(str(rs0), "2-5")
        self.assertEqual(str(rs1), "0-1")
        self.assertEqual(str(rs2), "6-7")
        self.assertEqual(str(rs3), "2-3")

        # reverse check
        rs1.add(2)
        self.assertEqual(str(rs1), "0-2")
        rs3.add(4)
        self.assertEqual(str(rs3), "2-4")
        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")

        self.assertEqual(str(rn1), "6-7; 2-3\n")
        rn1.update([[rs2, rs3]])
        self.assertEqual(str(rn1), "6-7; 2-4\n")

        self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")
Ejemplo n.º 49
0
 def testUnion(self):
     """test RangeSet.union()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r2 = RangeSet("243-799,1924-1984")
     self.assertEqual(len(r2), 618)
     r3 = r1.union(r2)
     self.assertEqual(type(r3), RangeSet)
     self.assertEqual(r3.padding, None)
     self.assertEqual(len(r3), 240+618) 
     self.assertEqual(str(r3), "1-100,102,105-800,1924-1984")
     r4 = r1 | r2
     self.assertEqual(len(r4), 240+618) 
     self.assertEqual(str(r4), "1-100,102,105-800,1924-1984")
     # test with overlap
     r2 = RangeSet("200-799")
     r3 = r1.union(r2)
     self.assertEqual(len(r3), 797)
     self.assertEqual(str(r3), "1-100,102,105-800")
     r4 = r1 | r2
     self.assertEqual(len(r4), 797)
     self.assertEqual(str(r4), "1-100,102,105-800")
Ejemplo n.º 50
0
 def nsiter(self):
     """Object-based NodeSet iterator on single nodes."""
     for pat, ivec, pad, autostep in self._iter():
         nodeset = self.__class__()
         if ivec is not None:
             if len(ivec) == 1:
                 nodeset._add_new(pat, \
                                  RangeSet.fromone(ivec[0], pad[0] or 0))
             else:
                 nodeset._add_new(pat, RangeSetND([ivec], None, autostep))
         else:
             nodeset._add_new(pat, None)
         yield nodeset
Ejemplo n.º 51
0
    def testIntersectStep(self):
        """test RangeSet with more intersections of ranges"""
        r1 = RangeSet("4-34/2")
        r2 = RangeSet("28-42/2")
        r1.intersection_update(r2)
        self.assertEqual(str(r1), "28,30,32,34")
        self.assertEqual(len(r1), 4)

        r1 = RangeSet("4-34/2")
        r2 = RangeSet("27-42/2")
        r1.intersection_update(r2)
        self.assertEqual(str(r1), "")
        self.assertEqual(len(r1), 0)

        r1 = RangeSet("2-60/3", autostep=3)
        r2 = RangeSet("3-50/2", autostep=3)
        r1.intersection_update(r2)
        self.assertEqual(str(r1), "5-47/6")
        self.assertEqual(len(r1), 8)
Ejemplo n.º 52
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     token_array_position = 0
     for idx, witness in enumerate(self.witnesses):
         # print("witness.tokens",witness.tokens())
         witness_range = RangeSet()
         witness_range.add_range(self.counter, self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         # remember get tokens twice
         sigil = witness.sigil
         for token in witness.tokens():
             token.token_data['_sigil'] = sigil
             token.token_data['_token_array_position'] = token_array_position
             token_array_position += 1
         self.token_array.extend(witness.tokens())
         # # add marker token
         self.token_array.append(Token({"n": '$' + str(idx), '_sigil': sigil}))
         token_array_position += 1
     self.token_array.pop()  # remove last marker
Ejemplo n.º 53
0
    def get_non_overlapping_repeating_blocks(self):
        extended_suffix_array = self.collation.to_extended_suffix_array()
        potential_blocks = extended_suffix_array.split_lcp_array_into_intervals() 
        self.filter_potential_blocks(potential_blocks)
        # step 3: sort the blocks based on depth (number of repetitions) first,
        # second length of LCP interval,
        # third sort on parent LCP interval occurrences.
        sorted_blocks_on_priority = sorted(potential_blocks, key=attrgetter("number_of_occurrences", "minimum_block_length", "number_of_siblings"), reverse=True)
        # step 4: select the definitive blocks
        occupied = RangeSet()
        real_blocks = []
        for potential_block in sorted_blocks_on_priority:
#           print(potential_block.info())
            try:
                non_overlapping_range = potential_block.calculate_non_overlapping_range_with(occupied)
                if non_overlapping_range:
#                     print("Selecting: "+str(potential_block))
                    occupied.union_update(non_overlapping_range)
                    real_blocks.append(Block(non_overlapping_range))
            except PartialOverlapException:          
#                 print("Skip due to conflict: "+str(potential_block))
                while potential_block.minimum_block_length > 1:
                    # retry with a different length: one less
                    for idx in range(potential_block.start+1, potential_block.end+1):
                        potential_block.LCP[idx] -= 1
                    potential_block.length -= 1
                    try:
                        non_overlapping_range = potential_block.calculate_non_overlapping_range_with(occupied)
                        if non_overlapping_range:
#                             print("Retried and selecting: "+str(potential_block))
                            occupied.union_update(non_overlapping_range)
                            real_blocks.append(Block(non_overlapping_range))
                            break
                    except PartialOverlapException:          
#                         print("Retried and failed again")
                        pass
        return real_blocks
Ejemplo n.º 54
0
 def testBinarySanityCheckNotImplementedSubtle(self):
     """test RangeSet binary sanity check (NotImplemented subtle)"""
     rg1 = RangeSet("1-5")
     rg2 = "4-6"
     self.assertEqual(rg1.__and__(rg2), NotImplemented)
     self.assertEqual(rg1.__or__(rg2), NotImplemented)
     self.assertEqual(rg1.__sub__(rg2), NotImplemented)
     self.assertEqual(rg1.__xor__(rg2), NotImplemented)
     # Should implicitely raises TypeError if the real operator
     # version is invoked. To test that, we perform a manual check
     # as an additional function would be needed to check with
     # assertRaises():
     good_error = False
     try:
         rg3 = rg1 & rg2
     except TypeError:
         good_error = True
     self.assert_(good_error, "TypeError not raised for &")
     good_error = False
     try:
         rg3 = rg1 | rg2
     except TypeError:
         good_error = True
     self.assert_(good_error, "TypeError not raised for |")
     good_error = False
     try:
         rg3 = rg1 - rg2
     except TypeError:
         good_error = True
     self.assert_(good_error, "TypeError not raised for -")
     good_error = False
     try:
         rg3 = rg1 ^ rg2
     except TypeError:
         good_error = True
     self.assert_(good_error, "TypeError not raised for ^")
Ejemplo n.º 55
0
 def testSplit(self):
     """test RangeSet.split()"""
     # Empty rangeset
     rangeset = RangeSet()
     self.assertEqual(len(list(rangeset.split(2))), 0)
     # Not enough element
     rangeset = RangeSet("1")
     self.assertEqual((RangeSet("1"),), tuple(rangeset.split(2)))
     # Exact number of elements
     rangeset = RangeSet("1-6")
     self.assertEqual((RangeSet("1-2"), RangeSet("3-4"), RangeSet("5-6")), \
                      tuple(rangeset.split(3)))
     # Check limit results
     rangeset = RangeSet("0-3")
     for i in (4, 5):
         self.assertEqual((RangeSet("0"), RangeSet("1"), \
                          RangeSet("2"), RangeSet("3")), \
                          tuple(rangeset.split(i)))
Ejemplo n.º 56
0
    def _scan_string_single(self, nsstr, autostep):
        """Single node scan, returns (pat, list of rangesets)"""
        # ignore whitespace(s)
        node = nsstr.strip()
        if len(node) == 0:
            raise NodeSetParseError(nsstr, "empty node name")

        # single node parsing
        pfx_nd = [mobj.groups() for mobj in self.base_node_re.finditer(node)]
        pfx_nd = pfx_nd[:-1]
        if not pfx_nd:
            raise NodeSetParseError(node, "parse error")

        # pfx+sfx cannot be empty
        if len(pfx_nd) == 1 and len(pfx_nd[0][0]) == 0:
            raise NodeSetParseError(node, "empty node name")

        pat = ""
        rangesets = []
        for pfx, idx in pfx_nd:
            if idx:
                # optimization: process single index padding directly
                pad = 0
                if int(idx) != 0:
                    idxs = idx.lstrip("0")
                    if len(idx) - len(idxs) > 0:
                        pad = len(idx)
                    idxint = int(idxs)
                else:
                    if len(idx) > 1:
                        pad = len(idx)
                    idxint = 0
                if idxint > 1e100:
                    raise NodeSetParseRangeError( \
                        RangeSetParseError(idx, "invalid rangeset index"))
                # optimization: use numerical RangeSet constructor
                pat += "%s%%s" % pfx
                rangesets.append(RangeSet.fromone(idxint, pad, autostep))
            else:
                # undefined pad means no node index
                pat += pfx
        return pat, rangesets
Ejemplo n.º 57
0
 def testUpdate(self):
     """test RangeSet.update()"""
     r1 = RangeSet("1-100,102,105-242,800")
     self.assertEqual(len(r1), 240)
     r2 = RangeSet("243-799,1924-1984")
     self.assertEqual(len(r2), 618)
     r1.update(r2)
     self.assertEqual(type(r1), RangeSet)
     self.assertEqual(r1.padding, None)
     self.assertEqual(len(r1), 240+618) 
     self.assertEqual(str(r1), "1-100,102,105-800,1924-1984")
     r1 = RangeSet("1-100,102,105-242,800")
     r1.union_update(r2)
     self.assertEqual(len(r1), 240+618) 
     self.assertEqual(str(r1), "1-100,102,105-800,1924-1984")
Ejemplo n.º 58
0
    def testFolding(self):
        """test RangeSet folding conditions"""
        r1 = RangeSet("112,114-117,119,121,130,132,134,136,138,139-141,144,147-148", autostep=6)
        self.assertEqual(str(r1), "112,114-117,119,121,130,132,134,136,138-141,144,147-148")
        r1.autostep = 5
        self.assertEqual(str(r1), "112,114-117,119,121,130-138/2,139-141,144,147-148")
        
        r1 = RangeSet("1,3-4,6,8")
        self.assertEqual(str(r1), "1,3-4,6,8")
        r1 = RangeSet("1,3-4,6,8", autostep=4)
        self.assertEqual(str(r1), "1,3-4,6,8")
        r1 = RangeSet("1,3-4,6,8", autostep=2)
        self.assertEqual(str(r1), "1,3,4-8/2")
        r1 = RangeSet("1,3-4,6,8", autostep=3)
        self.assertEqual(str(r1), "1,3,4-8/2")

        # empty set
        r1 = RangeSet(autostep=3)
        self.assertEqual(str(r1), "")
Ejemplo n.º 59
0
 def testIsSubSet(self):
     """test RangeSet.issubset()"""
     r1 = RangeSet("1-100,102,105-242,800-900/2")
     r2 = RangeSet("3,800,802,804,888")
     self.assertTrue(r2.issubset(r2))
     self.assertTrue(r2.issubset(r1))
     self.assertTrue(r2 <= r1)
     self.assertTrue(r2 < r1)
     self.assertTrue(r1 > r2)
     self.assertFalse(r1 < r2)
     self.assertFalse(r1 <= r2)
     self.assertFalse(r2 >= r1)
     # since v1.6, padding is ignored when computing set operations
     r1 = RangeSet("1-100")
     r2 = RangeSet("001-100")
     self.assertTrue(r1.issubset(r2))
Ejemplo n.º 60
0
 def testSlices(self):
     """test RangeSet.slices()"""
     r1 = RangeSet()
     self.assertEqual(len(r1), 0)
     self.assertEqual(len(list(r1.slices())), 0)
     # Without autostep
     r1 = RangeSet("1-7/2,8-12,3000-3019")
     self.assertEqual(r1.autostep, None)
     self.assertEqual(len(r1), 29)
     self.assertEqual(list(r1.slices()), [slice(1, 2, 1), slice(3, 4, 1), \
         slice(5, 6, 1), slice(7, 13, 1), slice(3000, 3020, 1)])
     # With autostep
     r1 = RangeSet("1-7/2,8-12,3000-3019", autostep=2)
     self.assertEqual(len(r1), 29)
     self.assertEqual(r1.autostep, 2)
     self.assertEqual(list(r1.slices()), [slice(1, 8, 2), slice(8, 13, 1), \
         slice(3000, 3020, 1)])