def test_ixor(self): """test RangeSet.__ixor__()""" r1 = RangeSet("1,3-9,14-21,30-39,42") r2 = RangeSet("2-5,10-32,35,40-41") r1 ^= r2 self.assertEqual(len(r1), 27) self.assertEqual(str(r1), "1-2,6-13,22-29,33-34,36-42")
def test_isub(self): """test RangeSet.__isub__()""" r1 = RangeSet("1,3-9,14-21,30-39,42") r2 = RangeSet("2-5,10-32,35,40-41") r1 -= r2 self.assertEqual(len(r1), 12) self.assertEqual(str(r1), "1,6-9,33-34,36-39,42")
def test_ior(self): """test RangeSet.__ior__()""" r1 = RangeSet("1,3-9,14-21,30-39,42") r2 = RangeSet("2-5,10-32,35,40-41") r1 |= r2 self.assertEqual(len(r1), 42) self.assertEqual(str(r1), "1-42")
def test_iand(self): """test RangeSet.__iand__()""" r1 = RangeSet("1,3-9,14-21,30-39,42") r2 = RangeSet("2-5,10-32,35,40-41") r1 &= r2 self.assertEqual(len(r1), 15) self.assertEqual(str(r1), "3-5,14-21,30-32,35")
def test_mutability_1(self): rs0 = RangeSet("2-5") rs1 = RangeSet("0-1") rn0 = RangeSetND([[rs0, rs1]]) #, copy_rangeset=False) self.assertEqual(str(rn0), "2-5; 0-1\n") rs2 = RangeSet("6-7") rs3 = RangeSet("2-3") rn1 = RangeSetND([[rs2, rs3]]) #, copy_rangeset=False) rn0.update(rn1) self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n") # check mutability safety self.assertEqual(str(rs0), "2-5") self.assertEqual(str(rs1), "0-1") self.assertEqual(str(rs2), "6-7") self.assertEqual(str(rs3), "2-3") # reverse check rs1.add(2) self.assertEqual(str(rs1), "0-2") rs3.add(4) self.assertEqual(str(rs3), "2-4") self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n") self.assertEqual(str(rn1), "6-7; 2-3\n") rn1.update([[rs2, rs3]]) self.assertEqual(str(rn1), "6-7; 2-4\n") self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")
def calculate_non_overlapping_range_with(self, occupied): # convert block occurrences into ranges potential_block_range = RangeSet() for occurrence in self.block_occurrences(): potential_block_range.add_range( occurrence, occurrence + self.minimum_block_length) #check the intersection with the already occupied ranges block_intersection = potential_block_range.intersection(occupied) if not block_intersection: # no overlap, return complete block_range return potential_block_range # There is overlap with occupied range # we need to deal with it real_block_range = RangeSet() for lower in potential_block_range.contiguous(): # TODO: what I really want here is a find first over a generator upper = [ x for x in block_intersection.contiguous() if x[0] >= lower[0] ] if upper: lower = lower[0] upper = upper[0][0] if lower != upper: real_block_range.add_range(lower, upper) if not real_block_range: # There is complete overlap, so return None return None # Assert: check that the first slice is not larger than potential block length! first_range = next(real_block_range.contiguous()) if first_range[-1] - first_range[0] + 1 > self.minimum_block_length: raise PartialOverlapException() return real_block_range
def part_one(): with open('data.txt', 'r') as f: data = f.read() fields, your_ticket, nearby_tickets = data.split('\n\n') # Build dictionary of fields and valid ranges all_ranges = RangeSet() fields = fields.split('\n') fields_dict = {} for field_line in fields: field_name, field_ranges = field_line.split(':') field_ranges = field_ranges.split('or') for field_range in field_ranges: if field_name in fields_dict: fields_dict[field_name] = fields_dict[field_name].union( RangeSet(field_range)) else: fields_dict[field_name] = RangeSet(field_range) all_ranges = all_ranges.union(RangeSet(field_range)) # Nearby tickets part_one_answer = 0 nearby_tickets = nearby_tickets.split('\n')[1:] for ticket in nearby_tickets: for field_value in ticket.split(','): if field_value not in all_ranges: part_one_answer += int(field_value) return part_one_answer
def test_witness_ranges_hermans_case(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") self.assertEquals(RangeSet("0-14"), collation.get_range_for_witness("W1")) self.assertEquals(RangeSet("17-29"), collation.get_range_for_witness("W2"))
def test_vectors(self): rn = RangeSetND([["0-10", "1-2"], ["5-60", "2"]]) # vectors() should perform automatic folding self.assertEqual([[RangeSet("0-60"), RangeSet("2")], [RangeSet("0-10"), RangeSet("1")]], list(rn.vectors())) self.assertEqual(str(rn), "0-60; 2\n0-10; 1\n") self.assertEqual(len(rn), 72)
def test_non_overlapping_blocks_Hermans(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-8, 17-25")), blocks) # a b c d F g h i ! self.assertIn(Block(RangeSet("11-14, 26-29")), blocks) # q r s t
def test_non_overlapping_blocks_Hermans(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") algorithm = Scorer(TokenIndex.create_token_index(collation)) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-8, 16-24")), blocks) # a b c d F g h i ! self.assertIn(Block(RangeSet("11-14, 25-28")), blocks) # q r s t
def testAddRange(self): """test RangeSet.add_range()""" r1 = RangeSet() r1.add_range(1, 100, 1) self.assertEqual(len(r1), 99) self.assertEqual(str(r1), "1-99") r1.add_range(40, 101, 1) self.assertEqual(len(r1), 100) self.assertEqual(str(r1), "1-100") r1.add_range(399, 423, 2) self.assertEqual(len(r1), 112) self.assertEqual( str(r1), "1-100,399,401,403,405,407,409,411,413,415,417,419,421") # With autostep... r1 = RangeSet(autostep=3) r1.add_range(1, 100, 1) self.assertEqual(r1.autostep, 3) self.assertEqual(len(r1), 99) self.assertEqual(str(r1), "1-99") r1.add_range(40, 101, 1) self.assertEqual(len(r1), 100) self.assertEqual(str(r1), "1-100") r1.add_range(399, 423, 2) self.assertEqual(len(r1), 112) self.assertEqual(str(r1), "1-100,399-421/2") # Bound checks r1 = RangeSet("1-30", autostep=2) self.assertEqual(len(r1), 30) self.assertEqual(str(r1), "1-30") self.assertEqual(r1.autostep, 2) r1.add_range(32, 35, 1) self.assertEqual(len(r1), 33) self.assertEqual(str(r1), "1-30,32-34") r1.add_range(31, 32, 1) self.assertEqual(len(r1), 34) self.assertEqual(str(r1), "1-34") r1 = RangeSet("1-30/4") self.assertEqual(len(r1), 8) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29") r1.add_range(30, 32, 1) self.assertEqual(len(r1), 10) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31") r1.add_range(40, 65, 10) self.assertEqual(len(r1), 13) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31,40,50,60") r1 = RangeSet("1-30", autostep=3) r1.add_range(40, 65, 10) self.assertEqual(r1.autostep, 3) self.assertEqual(len(r1), 33) self.assertEqual(str(r1), "1-29,30-60/10") # One r1.add_range(103, 104) self.assertEqual(len(r1), 34) self.assertEqual(str(r1), "1-29,30-60/10,103") # Zero self.assertRaises(AssertionError, r1.add_range, 103, 103)
def test_pickle_current(self): """test RangeSet pickling (current version)""" dump = pickle.dumps(RangeSet("1-100")) self.assertNotEqual(dump, None) rngset = pickle.loads(dump) self.assertEqual(rngset, RangeSet("1-100")) self.assertEqual(str(rngset), "1-100") self.assertEqual(rngset[0], 1) self.assertEqual(rngset[1], 2) self.assertEqual(rngset[-1], 100)
def get_compute_rangeset(self): """ returns rangeset of compute ids """ rset = RangeSet() for child in self.get_children(): mat = re.match(self._comp_regex, child) if mat: mdic = mat.groupdict() rset.union_update(RangeSet(str(mdic['id']))) return rset
def test_blocks_failing_transposition_use_case_old_algorithm(self): collation = Collation() collation.add_plain_witness("W1", "the cat and the dog") collation.add_plain_witness("W2", "the dog and the cat") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() block1 = Block(RangeSet("0-1, 9-10")) block2 = Block(RangeSet("3-4, 6-7")) block3 = Block(RangeSet("2, 8")) self.assertEqual([block1, block2, block3], blocks)
def test_witness_ranges_hermans_case(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") token_index = TokenIndex(collation.witnesses) token_index.prepare() self.assertEquals(RangeSet("0-14"), token_index.get_range_for_witness("W1")) self.assertEquals(RangeSet("16-28"), token_index.get_range_for_witness("W2"))
def test_2(self): collation = Collation() collation.add_plain_witness("W1", "in the in the bleach") collation.add_plain_witness("W2", "in the in the bleach in the") collation.add_plain_witness("W3", "in the in the bleach in the") algorithm = Scorer(TokenIndex.create_token_index(collation)) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-4, 6-10, 14-18")), blocks) # in the in the bleach self.assertIn(Block(RangeSet("11-12, 19-20")), blocks) # in the
def test_blocks_Hermans_case_three_witnesses(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") algorithm = Scorer(TokenIndex.create_token_index(collation)) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks) # a b c d self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks) # g h i self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")), blocks) # ! q r s t self.assertIn(Block(RangeSet("4, 20")), blocks) # F
def test_blocks_Hermans_case_three_witnesses(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks) # a b c d self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks) # g h i self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")), blocks) # ! q r s t self.assertIn(Block(RangeSet("4, 21")), blocks) # F
def testFromListConstructor(self): """test RangeSet.fromlist() constructor""" rgs = RangeSet.fromlist(["3", "5-8", "1"]) self.assertEqual(str(rgs), "1,3,5-8") self.assertEqual(len(rgs), 6) rgs = RangeSet.fromlist( [RangeSet("3"), RangeSet("5-8"), RangeSet("1")]) self.assertEqual(str(rgs), "1,3,5-8") self.assertEqual(len(rgs), 6) rgs = RangeSet.fromlist([set([3, 5, 6, 7, 8, 1])]) self.assertEqual(str(rgs), "1,3,5-8") self.assertEqual(len(rgs), 6)
def test_simple(self): # Test constructors self._testRS(None, "", 0) self._testRS([["0-10"], ["40-60"]], "0-10,40-60\n", 32) self._testRS([["0-2", "1-2"], ["10", "3-5"]], "0-2; 1-2\n10; 3-5\n", 9) self._testRS([[0, 1], [0, 2], [2, 2], [2, 1], [1, 1], [1, 2], [10, 4], [10, 5], [10, 3]], "0-2; 1-2\n10; 3-5\n", 9) self._testRS([(0, 4), (0, 5), (1, 4), (1, 5)], "0-1; 4-5\n", 4) # construct with copy_rangeset=False r0 = RangeSet("0-10,30-40,50") r1 = RangeSet("200-202") rn = RangeSetND([[r0, r1]], copy_rangeset=False) self.assertEqual(str(rn), "0-10,30-40,50; 200-202\n") self.assertEqual(len(rn), 69)
def test_mutability_2(self): rs0 = RangeSet("2-5") rs1 = RangeSet("0-1") rn0 = RangeSetND([[rs0, rs1]]) #, copy_rangeset=False) self.assertEqual(str(rn0), "2-5; 0-1\n") rs2 = RangeSet("6-7") rs3 = RangeSet("2-3") rn0.update([[rs2, rs3]]) self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n") rs3.add(4) self.assertEqual(str(rs3), "2-4") self.assertEqual(str(rn0), "2-5; 0-1\n6-7; 2-3\n")
def testIntersectionLength(self): """test RangeSet intersection/length""" r1 = RangeSet("115-117,130,166-170,4780-4999") self.assertEqual(len(r1), 229) r2 = RangeSet("116-117,130,4781-4999") self.assertEqual(len(r2), 222) res = r1.intersection(r2) self.assertEqual(len(res), 222) r1 = RangeSet("115-200") self.assertEqual(len(r1), 86) r2 = RangeSet("116-117,119,123-131,133,149,199") self.assertEqual(len(r2), 15) res = r1.intersection(r2) self.assertEqual(len(res), 15) # StopIteration test r1 = RangeSet("115-117,130,166-170,4780-4999,5003") self.assertEqual(len(r1), 230) r2 = RangeSet("116-117,130,4781-4999") self.assertEqual(len(r2), 222) res = r1.intersection(r2) self.assertEqual(len(res), 222) # StopIteration test2 r1 = RangeSet("130,166-170,4780-4999") self.assertEqual(len(r1), 226) r2 = RangeSet("116-117") self.assertEqual(len(r2), 2) res = r1.intersection(r2) self.assertEqual(len(res), 0)
def testIsSuperSet(self): """test RangeSet.issuperset()""" r1 = RangeSet("1-100,102,105-242,800") self.assertEqual(len(r1), 240) r2 = RangeSet("3-98,140-199,800") self.assertEqual(len(r2), 157) self.assertTrue(r1.issuperset(r1)) self.assertTrue(r1.issuperset(r2)) self.assertTrue(r1 >= r1) self.assertTrue(r1 > r2) self.assertFalse(r2 > r1) r2 = RangeSet("3-98,140-199,243,800") self.assertEqual(len(r2), 158) self.assertFalse(r1.issuperset(r2)) self.assertFalse(r1 > r2)
def testUpdate(self): """test RangeSet.update()""" r1 = RangeSet("1-100,102,105-242,800") self.assertEqual(len(r1), 240) r2 = RangeSet("243-799,1924-1984") self.assertEqual(len(r2), 618) r1.update(r2) self.assertEqual(type(r1), RangeSet) self.assertEqual(r1.padding, None) self.assertEqual(len(r1), 240 + 618) self.assertEqual(str(r1), "1-100,102,105-800,1924-1984") r1 = RangeSet("1-100,102,105-242,800") r1.union_update(r2) self.assertEqual(len(r1), 240 + 618) self.assertEqual(str(r1), "1-100,102,105-800,1924-1984")
def _extract_conf(self, cfg): """Extract cluster nodes configuration""" conf = {"default": {}} for key, val in cfg.iteritems(): if key == 'default': conf['default'].update(val) elif isinstance(val, dict): if isinstance(key, int): rset = RangeSet.fromone(key) else: try: rset = RangeSet(key) except RangeSetParseError as err: _LOGGER.warning( "Error in configuration file:" " %s. Ingnoring this part", err) continue for idx in rset: conf[idx] = val else: conf['default'][key] = val try: conf = clustdock.format_dict(conf, **self.__dict__) except KeyError: _LOGGER.exception("Key not found:") return conf
def testIsSubSet(self): """test RangeSet.issubset()""" r1 = RangeSet("1-100,102,105-242,800-900/2") r2 = RangeSet("3,800,802,804,888") self.assertTrue(r2.issubset(r2)) self.assertTrue(r2.issubset(r1)) self.assertTrue(r2 <= r1) self.assertTrue(r2 < r1) self.assertTrue(r1 > r2) self.assertFalse(r1 < r2) self.assertFalse(r1 <= r2) self.assertFalse(r2 >= r1) # since v1.6, padding is ignored when computing set operations r1 = RangeSet("1-100") r2 = RangeSet("001-100") self.assertTrue(r1.issubset(r2))
def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None token_array_position = 0 for idx, witness in enumerate(self.witnesses): # print("witness.tokens",witness.tokens()) witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range # remember get tokens twice sigil = witness.sigil for token in witness.tokens(): token.token_data['_sigil'] = sigil token.token_data[ '_token_array_position'] = token_array_position token_array_position += 1 self.token_array.extend(witness.tokens()) # # add marker token self.token_array.append( Token({ "n": '$' + str(idx), '_sigil': sigil })) token_array_position += 1 self.token_array.pop() # remove last marker
def test_blocks_splitting_token_case(self): collation = Collation() collation.add_plain_witness("W1", "a c b c") collation.add_plain_witness("W2", "a c b") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() block1 = Block(RangeSet("0-2, 5-7")) # a c b self.assertIn(block1, blocks)
def test_non_overlapping_blocks_black_cat(self): collation = Collation() collation.add_plain_witness("W1", "the black cat") collation.add_plain_witness("W2", "the black cat") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() block1 = Block(RangeSet("0-2, 4-6")) self.assertEqual([block1], blocks)