Ejemplo n.º 1
0
 def calculate_non_overlapping_range_with(self, occupied):
     # convert block occurrences into ranges
     potential_block_range = RangeSet()
     for occurrence in self.block_occurrences():
         potential_block_range.add_range(
             occurrence, occurrence + self.minimum_block_length)
     #check the intersection with the already occupied ranges
     block_intersection = potential_block_range.intersection(occupied)
     if not block_intersection:
         # no overlap, return complete block_range
         return potential_block_range
     # There is overlap with occupied range
     # we need to deal with it
     real_block_range = RangeSet()
     for lower in potential_block_range.contiguous():
         # TODO: what I really want here is a find first over a generator
         upper = [
             x for x in block_intersection.contiguous() if x[0] >= lower[0]
         ]
         if upper:
             lower = lower[0]
             upper = upper[0][0]
             if lower != upper:
                 real_block_range.add_range(lower, upper)
     if not real_block_range:
         # There is complete overlap, so return None
         return None
     # Assert: check that the first slice is not larger than potential block length!
     first_range = next(real_block_range.contiguous())
     if first_range[-1] - first_range[0] + 1 > self.minimum_block_length:
         raise PartialOverlapException()
     return real_block_range
Ejemplo n.º 2
0
 def calculate_non_overlapping_range_with(self, occupied):
     # convert block occurrences into ranges
     potential_block_range = RangeSet()
     for occurrence in self.block_occurrences():
         potential_block_range.add_range(occurrence, occurrence + self.minimum_block_length)
     #check the intersection with the already occupied ranges
     block_intersection = potential_block_range.intersection(occupied)
     if not block_intersection:
         # no overlap, return complete block_range
         return potential_block_range
     # There is overlap with occupied range
     # we need to deal with it
     real_block_range = RangeSet()
     for lower in potential_block_range.contiguous():
         # TODO: what I really want here is a find first over a generator
         upper = [x for x in block_intersection.contiguous() if x[0] >= lower[0]]
         if upper:
             lower = lower[0]
             upper = upper[0][0]
             if lower != upper:
                 real_block_range.add_range(lower, upper)
     if not real_block_range:
         # There is complete overlap, so return None
         return None
     # Assert: check that the first slice is not larger than potential block length!
     first_range = real_block_range.contiguous().next()
     if first_range[-1]-first_range[0]+1>self.minimum_block_length:
         raise PartialOverlapException()
     return real_block_range
Ejemplo n.º 3
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     token_array_position = 0
     for idx, witness in enumerate(self.witnesses):
         # print("witness.tokens",witness.tokens())
         witness_range = RangeSet()
         witness_range.add_range(self.counter,
                                 self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         # remember get tokens twice
         sigil = witness.sigil
         for token in witness.tokens():
             token.token_data['_sigil'] = sigil
             token.token_data[
                 '_token_array_position'] = token_array_position
             token_array_position += 1
         self.token_array.extend(witness.tokens())
         # # add marker token
         self.token_array.append(
             Token({
                 "n": '$' + str(idx),
                 '_sigil': sigil
             }))
         token_array_position += 1
     self.token_array.pop()  # remove last marker
Ejemplo n.º 4
0
 def add_witness(self, witnessdata):
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     witness = Witness(witnessdata)
     self.witnesses.append(witness)
     witness_range = RangeSet()
     witness_range.add_range(self.counter, self.counter+len(witness.tokens()))
     # the extra one is for the marker token
     self.counter += len(witness.tokens()) +2 # $ + number 
     self.witness_ranges[witness.sigil] = witness_range
     if not self.combined_string == "":
         self.combined_string += " $"+str(len(self.witnesses)-1)+ " "
     self.combined_string += witness.content
Ejemplo n.º 5
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     for idx, witness in enumerate(self.witnesses):
         witness_range = RangeSet()
         witness_range.add_range(self.counter, self.counter+len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         if self.token_array:
             # add marker token
             self.token_array.append(Token({"n":"$"+str(idx-1)}))
         # remember get tokens twice
         self.token_array.extend(witness.tokens())
Ejemplo n.º 6
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     for idx, witness in enumerate(self.witnesses):
         witness_range = RangeSet()
         witness_range.add_range(self.counter,
                                 self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         if self.token_array:
             # add marker token
             self.token_array.append(Token({"n": "$" + str(idx - 1)}))
         # remember get tokens twice
         self.token_array.extend(witness.tokens())
Ejemplo n.º 7
0
 def add_witness(self, witnessdata):
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     witness = Witness(witnessdata)
     self.witnesses.append(witness)
     witness_range = RangeSet()
     witness_range.add_range(self.counter,
                             self.counter + len(witness.tokens()))
     # the extra one is for the marker token
     self.counter += len(witness.tokens()) + 2  # $ + number
     self.witness_ranges[witness.sigil] = witness_range
     if len(self.witnesses) > 1:
         self.combined_tokens.append('$')
         self.combined_tokens.append(str(len(self.witnesses) - 1))
     for tk in witness.tokens():
         self.combined_tokens.append(tk.token_string)
Ejemplo n.º 8
0
 def _prepare_token_array(self):
     # TODO: the lazy init should move to somewhere else
     # clear the suffix array and LCP array cache
     self.cached_suffix_array = None
     token_array_position = 0
     for idx, witness in enumerate(self.witnesses):
         # print("witness.tokens",witness.tokens())
         witness_range = RangeSet()
         witness_range.add_range(self.counter, self.counter + len(witness.tokens()))
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) + 1
         self.witness_ranges[witness.sigil] = witness_range
         # remember get tokens twice
         sigil = witness.sigil
         for token in witness.tokens():
             token.token_data['_sigil'] = sigil
             token.token_data['_token_array_position'] = token_array_position
             token_array_position += 1
         self.token_array.extend(witness.tokens())
         # # add marker token
         self.token_array.append(Token({"n": '$' + str(idx), '_sigil': sigil}))
         token_array_position += 1
     self.token_array.pop()  # remove last marker
    def _get_non_overlapping_repeating_blocks(self):
        # The LCP intervals that are calculated from the extend suffix array are all potential blocks.
        # However some potential blocks overlap. To decide the definitive blocks we sort the potential blocks on the
        # amount of witnesses they occur in.
        potential_blocks = self.token_index.split_lcp_array_into_intervals()
        # we add all the intervals to a priority queue based on 1) number of witnesses 2) block length
        queue = PriorityQueue()
        for interval in potential_blocks:
            queue.put(interval)

        occupied = RangeSet()
        real_blocks = []

        while not queue.empty():
            item = queue.get()
            # print(item)
            # test intersection with occupied
            potential_block_range = item._as_range()
            # check the intersection with the already occupied ranges
            block_intersection = potential_block_range.intersection(occupied)
            if not block_intersection:
                # print("Selected!")
                occupied.union_update(potential_block_range)
                real_blocks.append(Block(potential_block_range))
                continue

            # check complete overlap or partial
            if block_intersection == potential_block_range:
                # print("complete overlap; skip")
                continue

            # print("partial overlap!")
            occurrence_difference = potential_block_range.difference(
                block_intersection)
            # print(occurrence_difference)

            # check on left partial overlap
            # filter it

            # determine start positions
            start_pos = item.block_occurrences()

            # print(start_pos)
            resulting_difference = RangeSet()
            count = 0
            for range in occurrence_difference.contiguous():
                if range[0] in start_pos:
                    resulting_difference.add_range(range[0], range[-1] + 1)
                    count += 1
            # print(resulting_difference)

            if count < 2:
                continue

            # in case of right partial overlap
            # calculate the minimum allowed range

            minimum_length = item.length
            for range in resulting_difference.contiguous():
                if len(range) < minimum_length:
                    minimum_length = len(range)

            # print(minimum_length)

            result = RangeSet()
            for range in resulting_difference.contiguous():
                result.add_range(range[0], range[0] + minimum_length)
            # print("Selecting partial result: "+str(result))

            occupied.union_update(result)
            real_blocks.append(Block(result))

        return real_blocks
Ejemplo n.º 10
0
    def _get_non_overlapping_repeating_blocks(self):
        # The LCP intervals that are calculated from the extend suffix array are all potential blocks.
        # However some potential blocks overlap. To decide the definitive blocks we sort the potential blocks on the
        # amount of witnesses they occur in.
        potential_blocks = self.token_index.split_lcp_array_into_intervals()
        # we add all the intervals to a priority queue based on 1) number of witnesses 2) block length
        queue = PriorityQueue()
        for interval in potential_blocks:
            queue.put(interval)

        occupied = RangeSet()
        real_blocks = []

        while not queue.empty():
            item = queue.get()
            # print(item)
            # test intersection with occupied
            potential_block_range = item._as_range()
            # check the intersection with the already occupied ranges
            block_intersection = potential_block_range.intersection(occupied)
            if not block_intersection:
                # print("Selected!")
                occupied.union_update(potential_block_range)
                real_blocks.append(Block(potential_block_range))
                continue

            # check complete overlap or partial
            if block_intersection == potential_block_range:
                # print("complete overlap; skip")
                continue

            # print("partial overlap!")
            occurrence_difference = potential_block_range.difference(block_intersection)
            # print(occurrence_difference)

            # check on left partial overlap
            # filter it

            # determine start positions
            start_pos = item.block_occurrences()

            # print(start_pos)
            resulting_difference = RangeSet()
            count = 0
            for range in occurrence_difference.contiguous():
                if range[0] in start_pos:
                    resulting_difference.add_range(range[0], range[-1]+1)
                    count+=1
            # print(resulting_difference)

            if count < 2:
                continue

            # in case of right partial overlap
            # calculate the minimum allowed range

            minimum_length = item.length
            for range in resulting_difference.contiguous():
                if len(range) < minimum_length:
                    minimum_length = len(range)

            # print(minimum_length)

            result = RangeSet()
            for range in resulting_difference.contiguous():
                result.add_range(range[0], range[0]+minimum_length)
            # print("Selecting partial result: "+str(result))

            occupied.union_update(result)
            real_blocks.append(Block(result))

        return real_blocks
Ejemplo n.º 11
0
 def testAddRange(self):
     """test RangeSet.add_range()"""
     r1 = RangeSet()
     r1.add_range(1, 100, 1)
     self.assertEqual(len(r1), 99)
     self.assertEqual(str(r1), "1-99")
     r1.add_range(40, 101, 1)
     self.assertEqual(len(r1), 100)
     self.assertEqual(str(r1), "1-100")
     r1.add_range(399, 423, 2)
     self.assertEqual(len(r1), 112)
     self.assertEqual(
         str(r1), "1-100,399,401,403,405,407,409,411,413,415,417,419,421")
     # With autostep...
     r1 = RangeSet(autostep=3)
     r1.add_range(1, 100, 1)
     self.assertEqual(r1.autostep, 3)
     self.assertEqual(len(r1), 99)
     self.assertEqual(str(r1), "1-99")
     r1.add_range(40, 101, 1)
     self.assertEqual(len(r1), 100)
     self.assertEqual(str(r1), "1-100")
     r1.add_range(399, 423, 2)
     self.assertEqual(len(r1), 112)
     self.assertEqual(str(r1), "1-100,399-421/2")
     # Bound checks
     r1 = RangeSet("1-30", autostep=2)
     self.assertEqual(len(r1), 30)
     self.assertEqual(str(r1), "1-30")
     self.assertEqual(r1.autostep, 2)
     r1.add_range(32, 35, 1)
     self.assertEqual(len(r1), 33)
     self.assertEqual(str(r1), "1-30,32-34")
     r1.add_range(31, 32, 1)
     self.assertEqual(len(r1), 34)
     self.assertEqual(str(r1), "1-34")
     r1 = RangeSet("1-30/4")
     self.assertEqual(len(r1), 8)
     self.assertEqual(str(r1), "1,5,9,13,17,21,25,29")
     r1.add_range(30, 32, 1)
     self.assertEqual(len(r1), 10)
     self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31")
     r1.add_range(40, 65, 10)
     self.assertEqual(len(r1), 13)
     self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31,40,50,60")
     r1 = RangeSet("1-30", autostep=3)
     r1.add_range(40, 65, 10)
     self.assertEqual(r1.autostep, 3)
     self.assertEqual(len(r1), 33)
     self.assertEqual(str(r1), "1-29,30-60/10")
     # One
     r1.add_range(103, 104)
     self.assertEqual(len(r1), 34)
     self.assertEqual(str(r1), "1-29,30-60/10,103")
     # Zero
     self.assertRaises(AssertionError, r1.add_range, 103, 103)
Ejemplo n.º 12
0
 def _as_range(self):
     # convert interval into range
     range = RangeSet()
     for occurrence in self.block_occurrences():
         range.add_range(occurrence, occurrence + self.minimum_block_length)
     return range
Ejemplo n.º 13
0
    def select_nodes(self, profil, name, nb_nodes, host):
        '''Select nodes to spawn'''
        # 1: recover available nodelist
        # 2: select nb_nodes among availables nodes
        # 3: return the list of nodes
        err = ""
        nodes = []
        if host is None:
            err = "Error: No host available\n"
            _LOGGER.error(err)
            self.rep_sock.send(msgpack.packb(('', [err])))
            return nodes
        if not vc.VirtualCluster.valid_clustername(name):
            err = "Error: clustername '{}' is not a valid name\n".format(name)
            _LOGGER.error(err)
            self.rep_sock.send(msgpack.packb(('', [err])))
            return nodes
        if profil not in self.profiles:
            err = "Error: Profil '{}' not found in configuration file\n".format(
                profil)
            _LOGGER.error(err)
            self.rep_sock.send(msgpack.packb(('', [err])))
            return nodes

        nodelist = self.list_nodes(byhost=False)
        nodeset = NodeSet.fromlist([node.name for node in nodelist])
        idx_min = 0
        idx_max = nb_nodes - 1
        base_range = RangeSet("%d-%d" % (idx_min, idx_max))
        base_nodeset = NodeSetBase(name + '%s', base_range)
        ndset_inter = nodeset.intersection(base_nodeset)
        while len(ndset_inter) != 0:
            indexes = [
                clustdock.VirtualNode.split_name(node)[1]
                for node in ndset_inter
            ]
            for idx in indexes:
                _LOGGER.debug("Removing %d from rangeset %s", idx, base_range)
                base_range.remove(idx)
            base_nodeset.difference_update(ndset_inter)
            _LOGGER.debug("Nodeset becomes '%s' after removing", base_nodeset)
            idx_min = max(indexes + list(base_range)) + 1
            idx_max = idx_min + max([len(indexes), nb_nodes - len(base_range)])
            base_range.add_range(idx_min, idx_max)
            _LOGGER.debug("New rangeset: %s", base_range)
            base_nodeset.update(
                NodeSetBase(name + '%s',
                            RangeSet.fromlist([range(idx_min, idx_max)])))
            _LOGGER.debug("New nodeset: %s", base_nodeset)
            ndset_inter = nodeset.intersection(base_nodeset)

        final_range = base_range
        _LOGGER.debug("final rangeset/nodeset: %s / %s", base_range,
                      base_nodeset)

        cluster = vc.VirtualCluster(name, profil, self.profiles[profil])
        nodes = []
        for idx in final_range:
            node = cluster.add_node(idx, host)
            nodes.append(node)
        return nodes
Ejemplo n.º 14
0
 def testAddRange(self):
     """test RangeSet.add_range()"""
     r1 = RangeSet()
     r1.add_range(1, 100, 1)
     self.assertEqual(len(r1), 99)
     self.assertEqual(str(r1), "1-99")
     r1.add_range(40, 101, 1)
     self.assertEqual(len(r1), 100)
     self.assertEqual(str(r1), "1-100")
     r1.add_range(399, 423, 2)
     self.assertEqual(len(r1), 112)
     self.assertEqual(str(r1), "1-100,399,401,403,405,407,409,411,413,415,417,419,421")
     # With autostep...
     r1 = RangeSet(autostep=3)
     r1.add_range(1, 100, 1)
     self.assertEqual(r1.autostep, 3)
     self.assertEqual(len(r1), 99)
     self.assertEqual(str(r1), "1-99")
     r1.add_range(40, 101, 1)
     self.assertEqual(len(r1), 100)
     self.assertEqual(str(r1), "1-100")
     r1.add_range(399, 423, 2)
     self.assertEqual(len(r1), 112)
     self.assertEqual(str(r1), "1-100,399-421/2")
     # Bound checks
     r1 = RangeSet("1-30", autostep=2)
     self.assertEqual(len(r1), 30)
     self.assertEqual(str(r1), "1-30")
     self.assertEqual(r1.autostep, 2)
     r1.add_range(32, 35, 1)
     self.assertEqual(len(r1), 33)
     self.assertEqual(str(r1), "1-30,32-34")
     r1.add_range(31, 32, 1)
     self.assertEqual(len(r1), 34)
     self.assertEqual(str(r1), "1-34")
     r1 = RangeSet("1-30/4")
     self.assertEqual(len(r1), 8)
     self.assertEqual(str(r1), "1,5,9,13,17,21,25,29")
     r1.add_range(30, 32, 1)
     self.assertEqual(len(r1), 10)
     self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31")
     r1.add_range(40, 65, 10)
     self.assertEqual(len(r1), 13)
     self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31,40,50,60")
     r1 = RangeSet("1-30", autostep=3)
     r1.add_range(40, 65, 10)
     self.assertEqual(r1.autostep, 3)
     self.assertEqual(len(r1), 33)
     self.assertEqual(str(r1), "1-29,30-60/10")
     # One
     r1.add_range(103, 104)
     self.assertEqual(len(r1), 34)
     self.assertEqual(str(r1), "1-29,30-60/10,103")
     # Zero
     self.assertRaises(AssertionError, r1.add_range, 103, 103)
Ejemplo n.º 15
0
 def _as_range(self):
     # convert interval into range
     range = RangeSet()
     for occurrence in self.block_occurrences():
         range.add_range(occurrence, occurrence + self.minimum_block_length)
     return range