Example #1
0
    def buildPages(self):
        dividers = [n * 280 for n in range(1, self.n_pages)]
        indices = SortedList(
            set(dividers + [key for key in self.offset.keys()]))
        pages = SortedDict()
        for i in range(0, self.n_pages):
            # first page
            if i is 0:
                page = indices[0:indices.index(dividers[0]) - 1]
            # last page
            elif i is self.n_pages - 1:
                page = indices[indices.index(dividers[i - 1]) - 1:]
            else:
                page = indices[indices.index(dividers[i - 1]):indices.
                               index(dividers[i])]
            pages[i] = page

        # remove dividers that hasn't been existed
        for key in pages:
            for item in pages[key]:
                if item not in self.offset:
                    pages[key].remove(item)

        self.pagination = SortedDict()
        for i in range(self.n_pages):
            temp_key_list = [key for key in pages[i]]
            page_pool = []
            for key in temp_key_list:
                page_pool.append(self.offset[key])
            self.pagination[i] = ' '.join(page_pool)
Example #2
0
def get_table_structure(cells):
    row_lines = set()
    col_lines = set()

    for cell in cells:
        row_lines.add(cell.top_left.y)
        row_lines.add(cell.bottom_right.y)
        col_lines.add(cell.top_left.x)
        col_lines.add(cell.bottom_right.x)

    row_lines = SortedList(row_lines)
    col_lines = SortedList(col_lines)

    table_cells = [list() for _ in range(len(row_lines) - 1)]
    for cell in cells:
        row_no = row_lines.index(cell.top_left.y)
        col_no = col_lines.index(cell.top_left.x)

        row_span = row_lines.index(cell.bottom_right.y) - row_no
        col_span = col_lines.index(cell.bottom_right.x) - col_no

        table_cells[row_no].append(
            TableCell(cell, row_no, col_no, row_span, col_span))

    table_cells = [
        sorted(row, key=lambda c: c.column_no) for row in table_cells
    ]
    return TableStructure(table_cells, len(row_lines) - 1, len(col_lines) - 1)
Example #3
0
def coincident_indices(list0, list1, delta):
    """Get indices of coincident times in both lists as dictionary.

    Parameters
    ----------
    list0 : list
        List of times
    list1 : list
        List of times (preferably longer than ``list0``)
    delta : float
        Time-delta slice

    Returns
    -------
    coincidents : dict
        {index_list0: index_list1}

    """
    slist0 = SortedList(list0)
    slist1 = SortedList(list1)

    coincidents = {}

    for t0 in iter(slist0):
        times = list(slist1.irange(t0 - delta, t0 + delta))
        diffs = []
        for t1 in iter(times):
            diffs.append(abs(t0 - t1))
        if len(diffs) > 0:
            coincidents[slist0.index(t0)] = slist1.index(
                times[np.argmin(diffs)])

    return coincidents
Example #4
0
    def test_pagination(self):
        print('test3')
        t = t3
        dividers = [n * 280 for n in range(1, t.n_pages)]
        self.assertEqual(len(dividers), 1)
        self.assertEqual(dividers[0], 280)

        indices = SortedList(set(dividers + [key for key in t.offset.keys()]))
        self.assertEqual(
            indices,
            SortedList([
                0, 4, 8, 17, 20, 26, 34, 44, 56, 65, 70, 79, 84, 88, 93, 102,
                109, 124, 137, 142, 146, 155, 165, 171, 175, 179, 188, 192,
                203, 213, 226, 230, 235, 239, 249, 252, 256, 262, 272, 280
            ]))

        pages = SortedDict()
        for i in range(0, t.n_pages):
            # first page
            if i is 0:
                page = indices[0:indices.index(dividers[0]) - 1]
                self.assertEqual(page, [
                    0, 4, 8, 17, 20, 26, 34, 44, 56, 65, 70, 79, 84, 88, 93,
                    102, 109, 124, 137, 142, 146, 155, 165, 171, 175, 179, 188,
                    192, 203, 213, 226, 230, 235, 239, 249, 252, 256, 262
                ])
            # last page
            elif i is t.n_pages - 1:
                page = indices[indices.index(dividers[i - 1]) - 1:]
            else:
                page = indices[indices.index(dividers[i - 1]):indices.
                               index(dividers[i])]
            pages[i] = page
        for key in pages:
            for item in pages[key]:
                if item not in t.offset:
                    pages[key].remove(item)
        self.assertNotEqual(len(pages), 0)

        t.pagination = SortedDict()
        for i in range(t.n_pages):
            temp_key_list = [key for key in pages[i]]
            page_pool = []
            for key in temp_key_list:
                page_pool.append(t.offset[key])
            t.pagination[i] = ' '.join(page_pool)

        self.assertIn('pagination', t.__dict__)
        for i in range(t.n_pages):
            self.assertLess(len(t.pagination[i]), 280)
Example #5
0
def _check_monotonicity(slist: SortedList, sort_dir: str, x: Union[float, int],
                        y: float) -> Tuple[SortedList, str]:
    item = (x, y)
    slist.add(item)
    idx = slist.index(item)
    num_vals = len(slist)

    if num_vals >= 3:
        if idx == num_vals - 1:
            filtered_list = slist[-3:]
        elif idx == 0:
            filtered_list = slist[:3]
        else:
            filtered_list = slist[idx - 1:idx + 2]

        none_increasing = _non_increasing(filtered_list)
        none_decreasing = _non_decreasing(filtered_list)
        if none_decreasing and none_increasing:
            updated_sort_dir = 'unknown'
        elif none_decreasing:
            updated_sort_dir = 'up'
        elif none_increasing:
            updated_sort_dir = 'down'
        else:
            print('Binary iterator observed non-monotonic values. Entering debugging mode:')
            breakpoint()

        # noinspection PyUnboundLocalVariable
        if sort_dir != 'unknown' and sort_dir != updated_sort_dir:
            print('Binary iterator observed non-monotonic values. Entering debugging mode:')
            breakpoint()

        sort_dir = updated_sort_dir

    return slist, sort_dir
Example #6
0
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int,
                                      t: int) -> bool:
        if t < 0:
            return False
        s = SortedList()
        for i in range(0, len(nums)):
            # print(s)
            if i > k:
                numToDelete = nums[i - k - 1]

                s.remove(numToDelete)
            # print(s)
            if s.__contains__(nums[i]):
                # print("yea")
                return True
            s.add(nums[i])
            pos = s.index(nums[i])
            # print(s, pos)
            if pos > 0:
                tmp = s.__getitem__(pos - 1)
                # print(tmp)
                if abs(nums[i] - tmp) <= t:
                    return True
            if pos < len(s) - 1:
                tmp = s.__getitem__(pos + 1)
                # print(tmp)
                if abs(nums[i] - tmp) <= t:
                    return True

        return False
def streamUpdate(stream, target):
    sl = SortedList()

    for num in stream:
        # insert num
        sl.add(num)
        index = sl.index(num)
        # Three conditions that need to remove nums
        # (index - 2, index - 1, index) (index - 1, index, index + 1), (index, index + 1, index + 2)
        flag = False
        if index - 2 >= 0:
            num1 = sl[index - 2]
            num2 = sl[index - 1]
            num3 = sl[index]
            if num2 - num1 <= target and num3 - num2 <= target:
                flag = True
        elif flag == False and index - 1 >= 0 and index + 1 < len(sl):
            num1 = sl[index - 1]
            num2 = sl[index]
            num3 = sl[index + 1]
            if num2 - num1 <= target and num3 - num2 <= target:
                flag = True
        elif flag == False and index + 2 < len(sl):
            num1 = sl[index]
            num2 = sl[index + 1]
            num3 = sl[index + 2]
            if num2 - num1 <= target and num3 - num2 <= target:
                flag = True

        if flag:
            sl.remove(num1)
            sl.remove(num2)
            sl.remove(num3)

        print(sl)
Example #8
0
class ColorTransferFunction(Data):
    point_added = Signal(ColorTransferFunctionPoint)

    def __init__(self, path: Path = None):
        super().__init__(path)

        self.points = SortedList()

    @classmethod
    def from_x_fractions_colors_array(
            cls,
            x_fractions_colors_array: np.ndarray,
            max_x: int = 255) -> ColorTransferFunction:
        color_transfer_function = cls()
        for row in x_fractions_colors_array:
            x_fraction = row[0]
            color_array = row[1:]
            color_transfer_function.add_point_from_x_color(
                x_fraction * max_x, color_array)
        return color_transfer_function

    @classmethod
    def default_jet(cls, max_x: int = 255) -> ColorTransferFunction:
        return ColorTransferFunction.from_x_fractions_colors_array(
            np.array([[0, 0, 0, 255, 255], [0.25, 0, 255, 255, 255],
                      [0.5, 0, 255, 0, 255], [0.75, 255, 255, 0, 255],
                      [1, 255, 0, 0, 255]]), max_x)

    def add_point(self, point: ColorTransferFunctionPoint):
        self.points.add(point)
        self.point_added.emit(point)

    def add_point_from_x_color(self,
                               x: float,
                               color_array: np.ndarray = np.full((4, ), 255)):
        self.add_point(ColorTransferFunctionPoint(x, color_array))

    def point_before(
            self,
            point: ColorTransferFunctionPoint) -> ColorTransferFunctionPoint:
        return self.points[self.points.index(point) - 1]

    def point_after(
            self,
            point: ColorTransferFunctionPoint) -> ColorTransferFunctionPoint:
        return self.points[self.points.index(point) + 1]
Example #9
0
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int,
                                      t: int) -> bool:
        from sortedcontainers import SortedList
        window = SortedList()

        for i, val in enumerate(nums):
            if i - k > 0:
                idx = window.index(nums[i - k - 1])
                window.pop(idx)

            window.add(val)
            j = window.index(val)
            if j - 1 >= 0 and abs(window[j - 1] - val) <= t:
                return True
            if j + 1 < len(window) and abs(window[j + 1] - val) <= t:
                return True
        return False
Example #10
0
 def kEmptySlots(self, bulbs: List[int], k: int) -> int:
     sl = SortedList([float('-inf'), float('inf')])
     for day, n in enumerate(bulbs, 1):
         sl.add(n)
         i = sl.index(n)
         if k + 1 in (sl[i] - sl[i - 1], sl[i + 1] - sl[i]):
             return day
     return -1
class Solution:
    def __init__(self, m: int, k: int):
        from sortedcontainers import SortedList
        self.m, self.k = m, k
        self.queue = deque()
        self.sl = SortedList()
        self.total = self.left_k = self.right_k = 0

    def addElement(self, num: int) -> None:

        if len(self.sl) < self.m - 1:
            self.sl.add(num)
            self.queue.append(num)
            self.total += num
        elif len(self.sl) == self.m - 1:
            self.sl.add(num)
            self.queue.append(num)
            self.total += num
            self.left_k = sum(self.sl[:self.k])
            self.right_k = sum(self.sl[-self.k:])
        else:

            # add element, update left_k, right_k
            index = self.sl.bisect_left(num)
            if index < self.k:
                self.left_k += num
                self.left_k -= self.sl[self.k - 1]

            if index > len(self.sl) - self.k:
                self.right_k += num
                self.right_k -= self.sl[len(self.sl) - self.k]

            self.sl.add(num)
            self.total += num
            self.queue.append(num)

            if len(self.sl) > self.m:

                num = self.queue.popleft()
                index = self.sl.index(num)
                print(num, index)

                if index < self.k:
                    self.left_k -= num
                    self.left_k += self.sl[self.k]

                if index > self.m - self.k:
                    self.right_k -= num
                    self.right_k += self.sl[self.m - self.k]

                self.total -= num
                self.sl.remove(num)

    def calculateMKAverage(self) -> int:
        if len(self.sl) < self.m:
            return -1
        return (self.total - self.left_k - self.right_k) // (self.m -
                                                             2 * self.k)
Example #12
0
def test_index():
    slt = SortedList(range(100), load=17)

    for val in range(100):
        assert val == slt.index(val)

    assert slt.index(99, 0, 1000) == 99

    slt = SortedList((0 for rpt in range(100)), load=17)

    for start in range(100):
        for stop in range(start, 100):
            assert slt.index(0, start, stop + 1) == start

    for start in range(100):
        assert slt.index(0, -(100 - start)) == start

    assert slt.index(0, -1000) == 0
def test_index():
    slt = SortedList(range(100), load=17)

    for val in range(100):
        assert val == slt.index(val)

    assert slt.index(99, 0, 1000) == 99

    slt = SortedList((0 for rpt in range(100)), load=17)

    for start in range(100):
        for stop in range(start, 100):
            assert slt.index(0, start, stop + 1) == start

    for start in range(100):
        assert slt.index(0, -(100 - start)) == start

    assert slt.index(0, -1000) == 0
Example #14
0
class ZSet:
    def __init__(self):
        self.mem2score = {}
        self.scores = SortedList()

    def __contains__(self, val):
        return val in self.mem2score

    def __setitem__(self, val, score):
        self.add(val, score)

    def __getitem__(self, key):
        return self.mem2score[key]

    def __len__(self):
        return len(self.mem2score)

    def __iter__(self):
        def f():
            for score, val in self.scores:
                yield val
        return f()

    def __str__(self):
        ans = []
        return

    def get(self, key, default=None):
        return self.mem2score.get(key, default)

    def add(self, val, score):
        s_prev = self.mem2score.get(val, None)
        if s_prev:
            if s_prev == score:
                return False
            self.scores.remove((s_prev, val))
        self.mem2score[val] = score
        self.scores.add((score, val))
        return True

    def discard(self, key):
        try:
            score = self.mem2score.pop(key)
        except KeyError:
            return
        self.scores.remove((score, key))

    def items(self):
        return self.mem2score.items()

    def rank(self, member):
        return self.scores.index((self.mem2score[member], member))

    def islice_score(self, start, stop, reverse=False):
        return self.scores.islice(start, stop, reverse)
class MKAverage:
    def __init__(self, m: int, k: int):
        self.m, self.k = m, k
        self.deque = collections.deque()
        self.sortedNums = SortedList()
        self.total = self.first_k = self.last_k = 0

    def addElement(self, num: int) -> None:
        self.total += num
        self.deque.append(num)
        index = self.sortedNums.bisect_left(num)

        # x x x [ x x x | x x x | x x x ] x
        #          ^
        # x x x [ x x x x | x x x | x x x ] x
        # x x x [ x x x | x x x | x x x ] x x

        # insert into first k
        if index <= self.k - 1:
            self.first_k += num
            if len(self.sortedNums) >= self.k:
                self.first_k -= self.sortedNums[self.k - 1]

        # x x x [ x x x | x x x | x x x ] x
        #                           ^
        # x x x [ x x x | x x x | x x x x ] x
        # x x x x [ x x x | x x x | x x x ] x

        # insert into last k, plus 1 here because bisect_left
        if index >= len(self.sortedNums) - self.k + 1:
            self.last_k += num
            if len(self.sortedNums) >= self.k:
                self.last_k -= self.sortedNums[-self.k]

        self.sortedNums.add(num)

        # remove extra num from the size m sliding window
        if len(self.deque) > self.m:
            num = self.deque.popleft()
            self.total -= num
            index = self.sortedNums.index(num)
            if index <= self.k - 1:
                self.first_k -= num
                self.first_k += self.sortedNums[self.k]
            elif index >= len(self.sortedNums) - self.k:
                self.last_k -= num
                self.last_k += self.sortedNums[-(self.k + 1)]
            self.sortedNums.remove(num)

    def calculateMKAverage(self) -> int:
        if len(self.sortedNums) < self.m:
            return -1
        return (self.total - self.first_k - self.last_k) // (self.m -
                                                             2 * self.k)
Example #16
0
    def collate_fn(self, interaction):
        users_orig = interaction['user_id']
        pos_orig = interaction[self.ITEM_ID] + self.num_users
        try:
            neg_orig = interaction[self.NEG_ITEM_ID] + self.num_users
        except Exception:
            neg_orig = th.zeros((0, ))

        unique = SortedList(
            set(users_orig.numpy().tolist() + pos_orig.numpy().tolist() +
                neg_orig.numpy().tolist()))
        all_nodes = th.Tensor(list(unique)).long()

        users = th.Tensor([unique.index(x) for x in users_orig]).long()
        pos = th.Tensor([unique.index(x) for x in pos_orig]).long()
        neg = th.Tensor([unique.index(x) for x in neg_orig]).long()

        collations = self.collator.collate(all_nodes)
        _, _, blocks = collations
        return users, pos, neg, blocks
def fast_generator(rotors: list):
    from sortedcontainers import SortedList

    intersection_pairs = set()
    intersections_set = set()
    status_array = SortedList()
    event_points: list = []

    for rotor_center in rotors:
        affiliations: list = [
            SemiCircle(circle_center=rotor_center, side=side)
            for side in [SemiCircleSide.left, SemiCircleSide.right]
        ]
        heapq.heappush(
            event_points,
            EventPoint(coordinates=(rotor_center[0], rotor_center[1] + 1),
                       affiliations=affiliations,
                       event_type=EventPointType.upper))
        heapq.heappush(
            event_points,
            EventPoint(coordinates=(rotor_center[0], rotor_center[1] - 1),
                       affiliations=affiliations,
                       event_type=EventPointType.bottom))

    while event_points:
        next_event_point: EventPoint = heapq.heappop(event_points)
        shared.sweep_line_progress = next_event_point.coordinates[1]

        if next_event_point.event_type == EventPointType.upper:
            status_array.update(next_event_point.affiliations)

        if next_event_point.event_type == EventPointType.intersection:
            status_array.discard(next_event_point.affiliations[0])
            status_array.discard(next_event_point.affiliations[1])
            status_array.update(next_event_point.affiliations)

        left_semi_circle_position: int = status_array.index(
            min(next_event_point.affiliations))
        # assert max(next_event_point.affiliations) == status_array[left_semi_circle_position + (-1) ** (next_event_point.event_type == EventPointType.intersection)]

        if next_event_point.event_type == EventPointType.bottom:
            status_array.discard(next_event_point.affiliations[0])
            status_array.discard(next_event_point.affiliations[1])

        refine_intersections(
            intersection_pairs=intersection_pairs,
            intersections_set=intersections_set,
            status_array=status_array,
            event_points=event_points,
            left_semi_circle_position=left_semi_circle_position,
            deletion=next_event_point.event_type == EventPointType.bottom)

    for pair in intersection_pairs:
        yield pair
class MKAverage(object):
    def __init__(self, m, k):
        """
        :type m: int
        :type k: int
        """
        self.__m = m
        self.__k = k
        self.__dq = collections.deque()
        self.__sl = SortedList()
        self.__total = self.__first_k = self.__last_k = 0

    def addElement(self, num):
        """
        :type num: int
        :rtype: None
        """
        if len(self.__dq) == self.__m:
            self.__remove(self.__dq.popleft())
        self.__dq.append(num)
        self.__add(num)

    def calculateMKAverage(self):
        """
        :rtype: int
        """
        if len(self.__sl) < self.__m:
            return -1
        return (self.__total - self.__first_k -
                self.__last_k) // (self.__m - 2 * self.__k)

    def __add(self, num):
        self.__total += num
        idx = self.__sl.bisect_left(num)
        if idx < self.__k:
            self.__first_k += num
            if len(self.__sl) >= self.__k:
                self.__first_k -= self.__sl[self.__k - 1]
        if idx > len(self.__sl) - self.__k:
            self.__last_k += num
            if len(self.__sl) >= self.__k:
                self.__last_k -= self.__sl[-self.__k]
        self.__sl.add(num)

    def __remove(self, num):
        self.__total -= num
        idx = self.__sl.index(num)
        if idx < self.__k:
            self.__first_k -= num
            self.__first_k += self.__sl[self.__k]
        elif idx > (len(self.__sl) - 1) - self.__k:
            self.__last_k -= num
            self.__last_k += self.__sl[-1 - self.__k]
        self.__sl.remove(num)
Example #19
0
    def to_vector(self, ordering: SortedList) -> State:
        """
        Convert the Side to a State accoring to given ordering.

        :param ordering: sequence of complex agents
        :return: State representing vector
        """
        vector = np.zeros(len(ordering), dtype=int)
        multiset = self.to_counter()
        for agent in list(multiset):
            vector[ordering.index(agent)] = multiset[agent]
        return State(vector)
Example #20
0
 def numTeams(self, rating: List[int]) -> int:
     ans = 0
     N=len(rating)
     smaller = [0] * N
     larger = [0] * N
     
     sl = SortedList()
     for i in range(N):
         sl.add(rating[i])
         j = sl.index(rating[i])
         smaller[i] = j
     
     sl = SortedList()
     for i in reversed(range(N)):
         sl.add(rating[i])
         j = sl.index(rating[i])
         larger[i] = len(sl)-j-1
     
     for i in range(N):
         ans += smaller[i] * larger[i] + (i-smaller[i]) * (N-i-1-larger[i])
     return ans
Example #21
0
    def processQueries3(self, queries: List[int], m: int) -> List[int]:
        vpos = {i+1: i for i in range(m)}
        poses = SortedList(range(m))

        res = []
        front = -1
        for v in queries:
            pos = vpos[v]
            res.append(poses.index(pos))

            vpos[v] = front
            poses.remove(pos)
            poses.add(front)
            front -= 1

        return res
Example #22
0
 def update_entry(cls, book: SortedList, entry: OrderBookEntry):
     if Decimal(entry.amount) == Decimal('0'):
         # remove price if amount is 0
         try:
             book.remove(entry)
         except ValueError:
             pass
     else:
         try:
             idx = book.index(entry)
         except ValueError:
             # price not found, insert it
             book.add(entry)
         else:
             # price found, update amount
             book[idx].amount = entry.amount
Example #23
0
class MySorted:
    def __init__(self):
        self.elements = SortedList()

    def index(self, val):
        if not self.elements or self.elements[0] >= val:
            return 0
        elif self.elements[-1] <= val:
            return len(self.elements)
        return self.elements.index(val)

    def append(self, val):
        self.elements.append(val)

    def __len__(self):
        return len(self.elements)
Example #24
0
class MKAverage:
    def __init__(self, m: int, k: int):
        self.m, self.k = m, k
        self.deque = collections.deque()
        self.sl = SortedList()
        self.total = self.first_k = self.last_k = 0

    def addElement(self, num: int) -> None:
        self.total += num
        self.deque.append(num)
        index = self.sl.bisect_left(num)
        if index < self.k:
            self.first_k += num
            if len(self.sl) >= self.k:
                self.first_k -= self.sl[self.k - 1]
        if index >= len(self.sl) + 1 - self.k:
            self.last_k += num
            if len(self.sl) >= self.k:
                self.last_k -= self.sl[-self.k]
        self.sl.add(num)
        if len(self.deque) > self.m:
            num = self.deque.popleft()
            self.total -= num
            index = self.sl.index(num)
            if index < self.k:
                self.first_k -= num
                self.first_k += self.sl[self.k]
            elif index >= len(self.sl) - self.k:
                self.last_k -= num
                self.last_k += self.sl[-self.k - 1]
            self.sl.remove(num)

    def calculateMKAverage(self) -> int:
        if len(self.sl) < self.m:
            return -1
        return (self.total - self.first_k - self.last_k) // (self.m -
                                                             2 * self.k)
def test_index_valueerror4():
    slt = SortedList([0] * 10, load=4)
    slt.index(1)
def test_index_valueerror2():
    slt = SortedList([0] * 10, load=4)
    slt.index(0, 0, -10)
def test_index_valueerror7():
    slt = SortedList([0] * 10 + [2] * 10)
    slt._reset(4)
    with pytest.raises(ValueError):
        slt.index(1, 0, 10)
def stress_index2(slt):
    values = list(slt)[:3] * 200
    slt = SortedList(values)
    for idx, val in enumerate(slt):
        assert slt.index(val, idx) == idx
Example #29
0
def test_index_valueerror6():
    slt = SortedList(range(10))
    slt._reset(4)
    with pytest.raises(ValueError):
        slt.index(3, 5)
Example #30
0
def test_index_valueerror3():
    slt = SortedList([0] * 10)
    slt._reset(4)
    with pytest.raises(ValueError):
        slt.index(0, 7, 3)
def test_index_valueerror3():
    slt = SortedList([0] * 10)
    slt._reset(4)
    with pytest.raises(ValueError):
        slt.index(0, 7, 3)
def stress_index2(slt):
    values = list(slt)[:3] * 200
    slt = SortedList(values)
    for idx, val in enumerate(slt):
        assert slt.index(val, idx) == idx
def test_index_valueerror5():
    slt = SortedList()
    with pytest.raises(ValueError):
        slt.index(1)
Example #34
0
class SweepLine(object):
	'''
	This class represents the vertical sweep line which sweeps over 
	the set of segments in the Bentley-Ottmann algorithm.

	At any moment, it contains a sorted list of all the 
	ComparableSegments which intersect with the sweep line in its 
	current position.

	Note that if 2 segments s1 and s2 are overlapping, you cannot 
	assume anything about their order in the sorted queue, as 
	s1 < s2 and s1 > s2 are both false

	Such sorted list would usually rely on a balanced binary search 
	tree data structure in order to have O(log(N)) insertion, 
	deletion and swapping.

	Instead, I chose to use a SortedList of Grant Jenks' SortedContainers 
	module, which has several advantages that you can discover by browsing
	its page. It allows O(log(N)) insertion, deletion and swapping, and I
	find it to be faster in practice.
	'''

	def __init__(self):
		'''
		Initializes an empty sweep line.
		'''
		self.l = SortedList()

	def isEmpty(self):
		'''
		Returns true if and only if the sweep line is empty.
		'''
		return len(self.l) == 0

	def addSegment(self, seg):
		'''
		Adds seg to the sweep line.
		'''
		ComparableSegment.currentX = seg.x1 
		self.l.add(seg)

	def removeSegment(self, seg):
		'''
		Removes seg from the sweep line.
		'''
		self.l.remove(seg)

	def belowSegments(self, seg):
		'''
		Returns a list containing :
		- The highest segment s_below contained in the sweep line 
		such as s_below.isBelow(seg)
		- All the segments s contained before s_below in the sweep 
		line but such as s.isBelow(s_below) is false, (i.e. which have the 
		same y-coordinate at ComparableSegment.currentX and gradient).
		'''
		res = []
		# i = index of seg
		i = self.l.index(seg)
		# Passes segments which have same y-coordinate and gradient
		# to find s_below
		while i-1 >= 0:
			prev = self.l[i-1]
			i -= 1
			if prev.isBelow(seg):
				res.append(prev)
				break
		# Appends all the segments which have same y-coordinate and 
		# gradient as s_below
		while i-1 >= 0:
			prev = self.l[i-1]
			if prev.isBelow(res[0]):
				break
			res.append(prev)
			i -= 1
		return res

	def aboveSegments(self, seg):
		'''
		Returns a list containing :
		- The lowest segment s_above contained in the sweep line 
		such as seg < s_above
		- All the segments s contained after s_above in the sweep 
		line but such as s_below < s is false, (i.e. which have the 
		same y-coordinate at ComparableSegment.currentX and gradient).
		'''
		res = []
		# i = index of seg
		i = self.l.index(seg)
		# Passes segments which have same y-coordinate and gradient
		# to find s_above
		while i+1 < len(self.l):
			succ = self.l[i+1]
			i += 1
			if seg.isBelow(succ):
				res.append(succ)
				break
		# Appends all the segments which have same y-coordinate and 
		# gradient as s_above
		while i+1 < len(self.l):
			succ = self.l[i+1]
			if res[0].isBelow(succ):
				break
			res.append(succ)
			i += 1
		return res

	def sameLevelAs(self, seg):
		'''
		Returns a list containing the segments s of the line
		such as s.aboveSegments(seg) and seg.aboveSegments(s) are
		both false, i.e. all the segments with same y-coordinate at 
		ComparableSegment.currentX and gradient as seg.
		'''
		i = self.l.index(seg)
		res = [self.l[i]]
		# Looks for same level segments above
		j = i + 1
		while j < len(self.l) and not seg.isBelow(self.l[j]):
			res.append(self.l[j])
			j += 1
		# Looks for same level segments below
		j = i - 1
		while j >= 0 and not self.l[j].isBelow(seg):
			res.append(self.l[j])
			j -= 1
		return res

	def betweenY(self, y_inf, y_sup, x):
		'''
		Returns a list of all the segments intersecting the sweep line
		between y-coordinates y_inf and y_sup included, at 
		x-coordinate x
		'''
		ComparableSegment.currentX = x
		res = []
		i = 0
		# Passes segments whose y-coordinate is < y_inf
		while i < len(self.l) and self.l[i].yAtX(x) < y_inf:
			i += 1
		while i < len(self.l) and self.l[i].yAtX(x) <= y_sup:
			res.append(self.l[i])
			i += 1

		return res

	def revertOrder(self, x, segments):
		'''
		Reverse the order of segments in the sweep line, at coord (x, y).
		'''
		indices = []
		for seg in segments:
			ComparableSegment.currentX = seg.x1
			indices.append(self.l.index(seg))
		# Update segments currentX so that the swap keep sort order
		ComparableSegment.currentX = x
		# Swaps the segments
		for i in range(floor(len(segments)/2)):
			i1 = indices[i]
			i2 = indices[-i-1]
			self.l[i1] = segments[-i-1]
			self.l[i2] = segments[i]
def test_index_valueerror5():
    slt = SortedList()
    slt.index(1)
def test_index_valueerror7():
    slt = SortedList([0] * 10 + [2] * 10, load=4)
    slt.index(1, 0, 10)
def test_index_valueerror6():
    slt = SortedList(range(10), load=4)
    slt.index(3, 5)
Example #38
0
class PriorityDict(MutableMapping):
    """
    A PriorityDict provides the same methods as a dict. Additionally, a
    PriorityDict efficiently maintains its keys in value sorted order.
    Consequently, the keys method will return the keys in value sorted order,
    the popitem method will remove the item with the highest value, etc.
    """
    def __init__(self, *args, **kwargs):
        """
        A PriorityDict provides the same methods as a dict. Additionally, a
        PriorityDict efficiently maintains its keys in value sorted order.
        Consequently, the keys method will return the keys in value sorted
        order, the popitem method will remove the item with the highest value,
        etc.
        If the first argument is the boolean value False, then it indicates
        that keys are not comparable. By default this setting is True and
        duplicate values are tie-breaked on the key. Using comparable keys
        improves the performance of the PriorityDict.
        An optional *iterable* argument provides an initial series of items to
        populate the PriorityDict.  Each item in the sequence must itself
        contain two items. The first is used as a key in the new dictionary,
        and the second as the key's value. If a given key is seen more than
        once, the last value associated with it is retained in the new
        dictionary.
        If keyword arguments are given, the keywords themselves with their
        associated values are added as items to the dictionary. If a key is
        specified both in the positional argument and as a keyword argument, the
        value associated with the keyword is retained in the dictionary. For
        example, these all return a dictionary equal to ``{"one": 2, "two":
        3}``:
        * ``SortedDict(one=2, two=3)``
        * ``SortedDict({'one': 2, 'two': 3})``
        * ``SortedDict(zip(('one', 'two'), (2, 3)))``
        * ``SortedDict([['two', 3], ['one', 2]])``
        The first example only works for keys that are valid Python
        identifiers; the others work with any valid keys.
        Note that this constructor mimics the Python dict constructor. If
        you're looking for a constructor like collections.Counter(...), see
        PriorityDict.count(...).
        """
        self._dict = dict()

        if len(args) > 0 and isinstance(args[0], bool):
            if args[0]:
                self._list = SortedList()
            else:
                self._list = SortedListWithKey(key=lambda tup: tup[0])
        else:
            self._list = SortedList()

        self.iloc = _IlocWrapper(self)
        self.update(*args, **kwargs)

    def clear(self):
        """Remove all elements from the dictionary."""
        self._dict.clear()
        self._list.clear()

    def clean(self, value=0):
        """
        Remove all items with value less than or equal to `value`.
        Default `value` is 0.
        """
        _list, _dict = self._list, self._dict
        pos = self.bisect_right(value)
        for key in (key for value, key in _list[:pos]):
            del _dict[key]
        del _list[:pos]

    def __contains__(self, key):
        """Return True if and only if *key* is in the dictionary."""
        return key in self._dict

    def __delitem__(self, key):
        """
        Remove ``d[key]`` from *d*.  Raises a KeyError if *key* is not in the
        dictionary.
        """
        value = self._dict[key]
        self._list.remove((value, key))
        del self._dict[key]

    def __getitem__(self, key):
        """
        Return the priority of *key* in *d*.  Raises a KeyError if *key* is not
        in the dictionary.
        """
        return self._dict[key]

    def __iter__(self):
        """
        Create an iterator over the keys of the dictionary ordered by the value
        sort order.
        """
        return iter(key for value, key in self._list)

    def __reversed__(self):
        """
        Create an iterator over the keys of the dictionary ordered by the
        reversed value sort order.
        """
        return iter(key for value, key in reversed(self._list))

    def __len__(self):
        """Return the number of (key, value) pairs in the dictionary."""
        return len(self._dict)

    def __setitem__(self, key, value):
        """Set `d[key]` to *value*."""
        if key in self._dict:
            old_value = self._dict[key]
            self._list.remove((old_value, key))
        self._list.add((value, key))
        self._dict[key] = value

    def copy(self):
        """Create a shallow copy of the dictionary."""
        result = PriorityDict()
        result._dict = self._dict.copy()
        result._list = self._list.copy()
        result.iloc = _IlocWrapper(result)
        return result

    def __copy__(self):
        """Create a shallow copy of the dictionary."""
        return self.copy()

    @classmethod
    def fromkeys(cls, iterable, value=0):
        """
        Create a new dictionary with keys from `iterable` and values set to
        `value`. The default *value* is 0.
        """
        return PriorityDict((key, value) for key in iterable)

    def get(self, key, default=None):
        """
        Return the value for *key* if *key* is in the dictionary, else
        *default*.  If *default* is not given, it defaults to ``None``,
        so that this method never raises a KeyError.
        """
        return self._dict.get(key, default)

    def has_key(self, key):
        """Return True if and only in *key* is in the dictionary."""
        return key in self._dict

    def pop(self, key, default=_NotGiven):
        """
        If *key* is in the dictionary, remove it and return its value,
        else return *default*. If *default* is not given and *key* is not in
        the dictionary, a KeyError is raised.
        """
        if key in self._dict:
            value = self._dict[key]
            self._list.remove((value, key))
            return self._dict.pop(key)
        else:
            if default == _NotGiven:
                raise KeyError
            else:
                return default

    def popitem(self, index=-1):
        """
        Remove and return item at *index* (default: -1). Raises IndexError if
        dict is empty or index is out of range. Negative indices are supported
        as for slice indices.
        """
        value, key = self._list.pop(index)
        del self._dict[key]
        return key, value

    def setdefault(self, key, default=0):
        """
        If *key* is in the dictionary, return its value.  If not, insert *key*
        with a value of *default* and return *default*.  *default* defaults to
        ``0``.
        """
        if key in self._dict:
            return self._dict[key]
        else:
            self._dict[key] = default
            self._list.add((default, key))
            return default

    def elements(self):
        """
        Return an iterator over elements repeating each as many times as its
        count. Elements are returned in value sort-order. If an element’s count
        is less than one, elements() will ignore it.
        """
        values = (repeat(key, value) for value, key in self._list)
        return chain.from_iterable(values)

    def most_common(self, count=None):
        """
        Return a list of the `count` highest priority elements with their
        priority. If `count` is not specified, `most_common` returns *all*
        elements in the dict. Elements with equal counts are ordered by key.
        """
        _list, _dict = self._list, self._dict

        if count is None:
            return [(key, value) for value, key in reversed(_list)]

        end = len(_dict)
        start = end - count

        return [(key, value) for value, key in reversed(_list[start:end])]

    def subtract(self, elements):
        """
        Elements are subtracted from an iterable or from another mapping (or
        counter). Like dict.update() but subtracts counts instead of replacing
        them. Both inputs and outputs may be zero or negative.
        """
        self -= Counter(elements)

    def tally(self, *args, **kwargs):
        """
        Elements are counted from an iterable or added-in from another mapping
        (or counter). Like dict.update() but adds counts instead of replacing
        them. Also, the iterable is expected to be a sequence of elements, not a
        sequence of (key, value) pairs.
        """
        self += Counter(*args, **kwargs)

    @classmethod
    def count(self, *args, **kwargs):
        """
        Consume `args` and `kwargs` with a Counter and use that mapping to
        initialize a PriorityDict.
        """
        return PriorityDict(Counter(*args, **kwargs))

    def update(self, *args, **kwargs):
        """
        Update the dictionary with the key/value pairs from *other*, overwriting
        existing keys.
        *update* accepts either another dictionary object or an iterable of
        key/value pairs (as a tuple or other iterable of length two).  If
        keyword arguments are specified, the dictionary is then updated with
        those key/value pairs: ``d.update(red=1, blue=2)``.
        """
        _list, _dict = self._list, self._dict

        if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
            items = args[0]
        else:
            items = dict(*args, **kwargs)

        if (10 * len(items)) > len(_dict):
            _dict.update(items)
            _list.clear()
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(items):
                old_value = _dict[key]
                _list.remove((old_value, key))
                _dict[key] = value
                _list.add((value, key))

    def index(self, key):
        """
        Return the smallest *i* such that `d.iloc[i] == key`.  Raises KeyError
        if *key* is not present.
        """
        value = self._dict[key]
        return self._list.index((value, key))

    def bisect_left(self, value):
        """
        Similar to the ``bisect`` module in the standard library, this returns
        an appropriate index to insert *value* in PriorityDict. If *value* is
        already present in PriorityDict, the insertion point will be before (to
        the left of) any existing entries.
        """
        return self._list.bisect_left((value,))

    def bisect(self, value):
        """Same as bisect_left."""
        return self._list.bisect((value,))

    def bisect_right(self, value):
        """
        Same as `bisect_left`, but if *value* is already present in
        PriorityDict, the insertion point will be after (to the right
        of) any existing entries.
        """
        return self._list.bisect_right((value, _Biggest))

    def __iadd__(self, that):
        """Add values from `that` mapping."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.update(that)
            _list.update((value, key) for key, value in iteritems(_dict))
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    _dict[key] += value
                else:
                    _dict[key] = value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value + value
                _dict[key] = value
                _list.add((value, key))
        return self

    def __isub__(self, that):
        """Subtract values from `that` mapping."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.clear()
            _list.clear()
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    _dict[key] -= value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value - value
                    _dict[key] = value
                    _list.add((value, key))
        return self

    def __ior__(self, that):
        """Or values from `that` mapping (max(v1, v2))."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.update(that)
            _list.update((value, key) for key, value in iteritems(_dict))
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _dict[key] = old_value if old_value > value else value
                else:
                    _dict[key] = value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value if old_value > value else value
                _dict[key] = value
                _list.add((value, key))
        return self

    def __iand__(self, that):
        """And values from `that` mapping (min(v1, v2))."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.clear()
            _list.clear()
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _dict[key] = old_value if old_value < value else value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value if old_value < value else value
                    _dict[key] = value
                    _list.add((value, key))
        return self

    def __add__(self, that):
        """Add values from this and `that` mapping."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                _dict[key] += value
            else:
                _dict[key] = value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __sub__(self, that):
        """Subtract values in `that` mapping from this."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                _dict[key] -= value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __or__(self, that):
        """Or values from this and `that` mapping."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                old_value = _dict[key]
                _dict[key] = old_value if old_value > value else value
            else:
                _dict[key] = value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __and__(self, that):
        """And values from this and `that` mapping."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                old_value = _dict[key]
                _dict[key] = old_value if old_value < value else value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __eq__(self, that):
        """Compare two mappings for equality."""
        if isinstance(that, PriorityDict):
            that = that._dict
        return self._dict == that

    def __ne__(self, that):
        """Compare two mappings for inequality."""
        if isinstance(that, PriorityDict):
            that = that._dict
        return self._dict != that

    def __lt__(self, that):
        """Compare two mappings for less than."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (_dict != that and self <= that)

    def __le__(self, that):
        """Compare two mappings for less than equal."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (len(_dict) <= len(that) and
                all(_dict[key] <= that[key] if key in that else False
                    for key in _dict))

    def __gt__(self, that):
        """Compare two mappings for greater than."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (_dict != that and self >= that)

    def __ge__(self, that):
        """Compare two mappings for greater than equal."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (len(_dict) >= len(that) and
                all(_dict[key] >= that[key] if key in _dict else False
                    for key in that))

    def isdisjoint(self, that):
        """
        Return True if no key in `self` is also in `that`.
        This doesn't check that the value is greater than zero.
        To remove keys with value less than or equal to zero see *clean*.
        """
        return not any(key in self for key in that)

    def items(self):
        """
        Return a list of the dictionary's items (``(key, value)``
        pairs). Items are ordered by their value from least to greatest.
        """
        return list((key, value) for value, key in self._list)

    def iteritems(self):
        """
        Return an iterable over the items (``(key, value)`` pairs) of the
        dictionary. Items are ordered by their value from least to greatest.
        """
        return iter((key, value) for value, key in self._list)

    @not26
    def viewitems(self):
        """
        In Python 2.7 and later, return a new `ItemsView` of the dictionary's
        items. Beware iterating the `ItemsView` as items are unordered.
        In Python 2.6, raise a NotImplementedError.
        """
        if hexversion < 0x03000000:
            return self._dict.viewitems()
        else:
            return self._dict.items()

    def keys(self):
        """
        Return a list of the dictionary's keys. Keys are ordered
        by their corresponding value from least to greatest.
        """
        return list(key for value, key in self._list)

    def iterkeys(self):
        """
        Return an iterable over the keys of the dictionary. Keys are ordered
        by their corresponding value from least to greatest.
        """
        return iter(key for value, key in self._list)

    @not26
    def viewkeys(self):
        """
        In Python 2.7 and later, return a new `KeysView` of the dictionary's
        keys. Beware iterating the `KeysView` as keys are unordered.
        In Python 2.6, raise a NotImplementedError.
        """
        if hexversion < 0x03000000:
            return self._dict.viewkeys()
        else:
            return self._dict.keys()

    def values(self):
        """
        Return a list of the dictionary's values. Values are
        ordered from least to greatest.
        """
        return list(value for value, key in self._list)

    def itervalues(self):
        """
        Return an iterable over the values of the dictionary. Values are
        iterated from least to greatest.
        """
        return iter(value for value, key in self._list)

    @not26
    def viewvalues(self):
        """
        In Python 2.7 and later, return a `ValuesView` of the dictionary's
        values. Beware iterating the `ValuesView` as values are unordered.
        In Python 2.6, raise a NotImplementedError.
        """
        if hexversion < 0x03000000:
            return self._dict.viewvalues()
        else:
            return self._dict.values()

    def __repr__(self):
        """Return a string representation of PriorityDict."""
        return 'PriorityDict({0})'.format(repr(dict(self)))

    def _check(self):
        self._list._check()
        assert len(self._dict) == len(self._list)
        assert all(key in self._dict and self._dict[key] == value
                   for value, key in self._list)
Example #39
0
def test_index_valueerror5():
    slt = SortedList()
    with pytest.raises(ValueError):
        slt.index(1)
Example #40
0
def prev_workspace_for_current_output_num():
    current_workspace, output_workspaces = workspaces_for_current_output()
    sorted_workspaces = SortedList(
        workspace["num"] for workspace in output_workspaces)
    current_num = sorted_workspaces.index(current_workspace["num"])
    return sorted_workspaces[(current_num - 1) % len(sorted_workspaces)]
Example #41
0
def test_index_valueerror7():
    slt = SortedList([0] * 10 + [2] * 10)
    slt._reset(4)
    with pytest.raises(ValueError):
        slt.index(1, 0, 10)
Example #42
0
class UrlClusterizedQueue:
    def __init__(self):
        self._features_count_list = SortedList()  # list of pairs (feature_count, feature_name)
        self._features_count_dict = dict()

        self._clusterizer = DBSCAN(metric='jaccard')
        self._min_freq = 0.1
        self._max_freq = 0.9

        self._urls = dict()
        self._urls_keys = []
        self._index = -1
        self._min_urls_count = 50

        self._subqueue = Queue()
        self._subqueue_len = 4

        # constants
        self._i_is_used = 1
        self._i_features = 0

        self._i_list_for_unused = 0
        self._i_list_for_used = 1
        self._i_list_total = 2

    def _return_url(self, url):
        self._urls[url][1] = True
        return url

    def _next_queue_fallback(self):
        self._index += 1
        url = self._urls_keys[self._index]
        return self._return_url(url)

    def _run_clustering(self):
        print("try to use clustering")
        # here we need to run clustering
        # first of all we need to choose features
        max_feature_count = int(self._max_freq * len(self._urls))
        min_feature_count = int(self._min_freq * len(self._urls))
        start_index = bisect.bisect_left(self._features_count_list, (min_feature_count, ''))
        end_index = bisect.bisect_right(self._features_count_list, (max_feature_count, 'ZZZ'))
        if start_index >= end_index:
            print("not enough features")
            return self._next_queue_fallback()

        chosen_features = SortedSet()
        for i in range(start_index, end_index):
            chosen_features.add(self._features_count_list[i][1])

        # then we need to build features matrix
        X = np.empty((len(self._urls), len(chosen_features)))
        for i in range(len(self._urls)):
            features = self._urls[self._urls_keys[i]][self._i_features]
            for j, fname in enumerate(chosen_features):
                if fname in features:
                    X[i][j] = 1
                else:
                    X[i][j] = 0

        # now we can run clustering
        y = self._clusterizer.fit_predict(X)

        # and we need to create uniform distributed queue
        def get_list_of_2_sets():
            return [set(), set(), 0]  # 0 is for used urls, # 1 is for unused # 3 is for total count

        url_in_cluster = defaultdict(get_list_of_2_sets)
        for i in range(len(y)):
            url = self._urls_keys[i]
            if self._urls[url][self._i_is_used]:
                url_in_cluster[y[i]][self._i_list_for_used].add(url)
            else:
                url_in_cluster[y[i]][self._i_list_for_unused].add(url)
            url_in_cluster[y[i]][self._i_list_total] += 1

        limit = self._subqueue_len
        cluster_keys = SortedKeyList(url_in_cluster.keys(), key=lambda x: -len(url_in_cluster[x][self._i_list_for_used]))
        while limit > 0:  # Todo: optimize
            if len(cluster_keys) > 0:
                less_index = cluster_keys.pop()
                unused_urls = url_in_cluster[less_index][self._i_list_for_unused]
                if len(unused_urls) > 0:
                    url = unused_urls.pop()
                    self._subqueue.put(url)
                    limit -= 1

                    if len(unused_urls) > 0:
                        url_in_cluster[less_index][self._i_list_for_used].add(url)
                        cluster_keys.add(less_index)
            else:
                break

    async def _run_and_wait_clustering(self):
        t = threading.Thread(target=UrlClusterizedQueue._run_clustering, args=(self,))
        t.run()
        while t.is_alive():
            await asyncio.sleep(0.3)

    async def get(self):
        if len(self._urls) < self._min_urls_count:
            return self._next_queue_fallback()
        else:
            if self._subqueue.empty():
                await self._run_and_wait_clustering()
            # if self._subqueue.qsize() == 1:
            #     asyncio.create_task(self._run_and_wait_clustering())
            return self._return_url(self._subqueue.get())

    async def empty(self):
        if self._index + 1 >= len(self._urls):
            return True
        else:
            return False

    async def put(self, url):
        if url in self._urls:
            return

        features = url_features.extract(url)
        for fname in features:
            if fname in self._features_count_dict:
                fcount = self._features_count_dict[fname]
                del self._features_count_list[self._features_count_list.index((fcount, fname))]
            else:
                fcount = 0
            fcount += 1
            self._features_count_dict[fname] = fcount
            self._features_count_list.add((fcount, fname))

        self._urls[url] = [features, False]  # False is for used
        self._urls_keys.append(url)
Example #43
0
class Timeline:
    """
    Ordered set of segments.

    A timeline can be seen as an ordered set of non-empty segments (Segment).
    Segments can overlap -- though adding an already exisiting segment to a
    timeline does nothing.

    Parameters
    ----------
    segments : Segment iterator, optional
        initial set of (non-empty) segments
    uri : string, optional
        name of segmented resource

    Returns
    -------
    timeline : Timeline
        New timeline
    """

    @classmethod
    def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None) -> 'Timeline':
        segments = list(df[PYANNOTE_SEGMENT])
        timeline = cls(segments=segments, uri=uri)
        return timeline

    def __init__(self,
                 segments: Optional[Iterable[Segment]] = None,
                 uri: str = None):
        if segments is None:
            segments = ()

        # set of segments  (used for checking inclusion)
        segments_set = set(segments)

        if any(not segment for segment in segments_set):
            raise ValueError('Segments must not be empty.')

        self.segments_set_ = segments_set

        # sorted list of segments (used for sorted iteration)
        self.segments_list_ = SortedList(segments_set)

        # sorted list of (possibly redundant) segment boundaries
        boundaries = (boundary for segment in segments_set for boundary in segment)
        self.segments_boundaries_ = SortedList(boundaries)

        # path to (or any identifier of) segmented resource
        self.uri: str = uri

    def __len__(self):
        """Number of segments

        >>> len(timeline)  # timeline contains three segments
        3
        """
        return len(self.segments_set_)

    def __nonzero__(self):
        return self.__bool__()

    def __bool__(self):
        """Emptiness

        >>> if timeline:
        ...    # timeline is empty
        ... else:
        ...    # timeline is not empty
        """
        return len(self.segments_set_) > 0

    def __iter__(self) -> Iterable[Segment]:
        """Iterate over segments (in chronological order)

        >>> for segment in timeline:
        ...     # do something with the segment

        See also
        --------
        :class:`pyannote.core.Segment` describes how segments are sorted.
        """
        return iter(self.segments_list_)

    def __getitem__(self, k: int) -> Segment:
        """Get segment by index (in chronological order)

        >>> first_segment = timeline[0]
        >>> penultimate_segment = timeline[-2]
        """
        return self.segments_list_[k]

    def __eq__(self, other: 'Timeline'):
        """Equality

        Two timelines are equal if and only if their segments are equal.

        >>> timeline1 = Timeline([Segment(0, 1), Segment(2, 3)])
        >>> timeline2 = Timeline([Segment(2, 3), Segment(0, 1)])
        >>> timeline3 = Timeline([Segment(2, 3)])
        >>> timeline1 == timeline2
        True
        >>> timeline1 == timeline3
        False
        """
        return self.segments_set_ == other.segments_set_

    def __ne__(self, other: 'Timeline'):
        """Inequality"""
        return self.segments_set_ != other.segments_set_

    def index(self, segment: Segment) -> int:
        """Get index of (existing) segment

        Parameters
        ----------
        segment : Segment
            Segment that is being looked for.

        Returns
        -------
        position : int
            Index of `segment` in timeline

        Raises
        ------
        ValueError if `segment` is not present.
        """
        return self.segments_list_.index(segment)

    def add(self, segment: Segment) -> 'Timeline':
        """Add a segment (in place)

        Parameters
        ----------
        segment : Segment
            Segment that is being added

        Returns
        -------
        self : Timeline
            Updated timeline.

        Note
        ----
        If the timeline already contains this segment, it will not be added
        again, as a timeline is meant to be a **set** of segments (not a list).

        If the segment is empty, it will not be added either, as a timeline
        only contains non-empty segments.
        """

        segments_set_ = self.segments_set_
        if segment in segments_set_ or not segment:
            return self

        segments_set_.add(segment)

        self.segments_list_.add(segment)

        segments_boundaries_ = self.segments_boundaries_
        segments_boundaries_.add(segment.start)
        segments_boundaries_.add(segment.end)

        return self

    def remove(self, segment: Segment) -> 'Timeline':
        """Remove a segment (in place)

        Parameters
        ----------
        segment : Segment
            Segment that is being removed

        Returns
        -------
        self : Timeline
            Updated timeline.

        Note
        ----
        If the timeline does not contain this segment, this does nothing
        """

        segments_set_ = self.segments_set_
        if segment not in segments_set_:
            return self

        segments_set_.remove(segment)

        self.segments_list_.remove(segment)

        segments_boundaries_ = self.segments_boundaries_
        segments_boundaries_.remove(segment.start)
        segments_boundaries_.remove(segment.end)

        return self

    def discard(self, segment: Segment) -> 'Timeline':
        """Same as `remove`

        See also
        --------
        :func:`pyannote.core.Timeline.remove`
        """
        return self.remove(segment)

    def __ior__(self, timeline: 'Timeline') -> 'Timeline':
        return self.update(timeline)

    def update(self, timeline: Segment) -> 'Timeline':
        """Add every segments of an existing timeline (in place)

        Parameters
        ----------
        timeline : Timeline
            Timeline whose segments are being added

        Returns
        -------
        self : Timeline
            Updated timeline

        Note
        ----
        Only segments that do not already exist will be added, as a timeline is
        meant to be a **set** of segments (not a list).

        """

        segments_set = self.segments_set_

        segments_set |= timeline.segments_set_

        # sorted list of segments (used for sorted iteration)
        self.segments_list_ = SortedList(segments_set)

        # sorted list of (possibly redundant) segment boundaries
        boundaries = (boundary for segment in segments_set for boundary in segment)
        self.segments_boundaries_ = SortedList(boundaries)

        return self

    def __or__(self, timeline: 'Timeline') -> 'Timeline':
        return self.union(timeline)

    def union(self, timeline: 'Timeline') -> 'Timeline':
        """Create new timeline made of union of segments

        Parameters
        ----------
        timeline : Timeline
            Timeline whose segments are being added

        Returns
        -------
        union : Timeline
            New timeline containing the union of both timelines.

        Note
        ----
        This does the same as timeline.update(...) except it returns a new
        timeline, and the original one is not modified.
        """
        segments = self.segments_set_ | timeline.segments_set_
        return Timeline(segments=segments, uri=self.uri)

    def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]:
        """Iterate over pairs of intersecting segments

        >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)])
        >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)])
        >>> for segment1, segment2 in timeline1.co_iter(timeline2):
        ...     print(segment1, segment2)
        (<Segment(0, 2)>, <Segment(1, 3)>)
        (<Segment(1, 2)>, <Segment(1, 3)>)
        (<Segment(3, 4)>, <Segment(3, 5)>)

        Parameters
        ----------
        other : Timeline
            Second timeline

        Returns
        -------
        iterable : (Segment, Segment) iterable
            Yields pairs of intersecting segments in chronological order.
        """

        for segment in self.segments_list_:

            # iterate over segments that starts before 'segment' ends
            temp = Segment(start=segment.end, end=segment.end)
            for other_segment in other.segments_list_.irange(maximum=temp):
                if segment.intersects(other_segment):
                    yield segment, other_segment

    def crop_iter(self,
                  support: Support,
                  mode: CropMode = 'intersection',
                  returns_mapping: bool = False) \
            -> Iterator[Union[Tuple[Segment, Segment], Segment]]:
        """Like `crop` but returns a segment iterator instead

        See also
        --------
        :func:`pyannote.core.Timeline.crop`
        """

        if mode not in {'loose', 'strict', 'intersection'}:
            raise ValueError("Mode must be one of 'loose', 'strict', or "
                             "'intersection'.")

        if not isinstance(support, (Segment, Timeline)):
            raise TypeError("Support must be a Segment or a Timeline.")

        if isinstance(support, Segment):
            # corner case where "support" is empty
            if support:
                segments = [support]
            else:
                segments = []

            support = Timeline(segments=segments, uri=self.uri)
            for yielded in self.crop_iter(support, mode=mode,
                                          returns_mapping=returns_mapping):
                yield yielded
            return

        # loose mode
        if mode == 'loose':
            for segment, _ in self.co_iter(support):
                yield segment
            return

        # strict mode
        if mode == 'strict':
            for segment, other_segment in self.co_iter(support):
                if segment in other_segment:
                    yield segment
            return

        # intersection mode
        for segment, other_segment in self.co_iter(support):
            mapped_to = segment & other_segment
            if not mapped_to:
                continue
            if returns_mapping:
                yield segment, mapped_to
            else:
                yield mapped_to

    def crop(self,
             support: Support,
             mode: CropMode = 'intersection',
             returns_mapping: bool = False) \
            -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]:
        """Crop timeline to new support

        Parameters
        ----------
        support : Segment or Timeline
            If `support` is a `Timeline`, its support is used.
        mode : {'strict', 'loose', 'intersection'}, optional
            Controls how segments that are not fully included in `support` are
            handled. 'strict' mode only keeps fully included segments. 'loose'
            mode keeps any intersecting segment. 'intersection' mode keeps any
            intersecting segment but replace them by their actual intersection.
        returns_mapping : bool, optional
            In 'intersection' mode, return a dictionary whose keys are segments
            of the cropped timeline, and values are list of the original
            segments that were cropped. Defaults to False.

        Returns
        -------
        cropped : Timeline
            Cropped timeline
        mapping : dict
            When 'returns_mapping' is True, dictionary whose keys are segments
            of 'cropped', and values are lists of corresponding original
            segments.

        Examples
        --------

        >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)])
        >>> timeline.crop(Segment(1, 3))
        <Timeline(uri=None, segments=[<Segment(1, 2)>])>

        >>> timeline.crop(Segment(1, 3), mode='loose')
        <Timeline(uri=None, segments=[<Segment(0, 2)>, <Segment(1, 2)>])>

        >>> timeline.crop(Segment(1, 3), mode='strict')
        <Timeline(uri=None, segments=[<Segment(1, 2)>])>

        >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True)
        >>> print(mapping)
        {<Segment(1, 2)>: [<Segment(0, 2)>, <Segment(1, 2)>]}

        """

        if mode == 'intersection' and returns_mapping:
            segments, mapping = [], {}
            for segment, mapped_to in self.crop_iter(support,
                                                     mode='intersection',
                                                     returns_mapping=True):
                segments.append(mapped_to)
                mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment]
            return Timeline(segments=segments, uri=self.uri), mapping

        return Timeline(segments=self.crop_iter(support, mode=mode),
                        uri=self.uri)

    def overlapping(self, t: float) -> List[Segment]:
        """Get list of segments overlapping `t`

        Parameters
        ----------
        t : float
            Timestamp, in seconds.

        Returns
        -------
        segments : list
            List of all segments of timeline containing time t
        """
        return list(self.overlapping_iter(t))

    def overlapping_iter(self, t: float) -> Iterator[Segment]:
        """Like `overlapping` but returns a segment iterator instead

        See also
        --------
        :func:`pyannote.core.Timeline.overlapping`
        """
        segment = Segment(start=t, end=t)
        for segment in self.segments_list_.irange(maximum=segment):
            if segment.overlaps(t):
                yield segment

    def __str__(self):
        """Human-readable representation

        >>> timeline = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)])
        >>> print(timeline)
        [[ 00:00:00.000 -->  00:00:10.000]
         [ 00:00:01.000 -->  00:00:13.370]]

        """

        n = len(self.segments_list_)
        string = "["
        for i, segment in enumerate(self.segments_list_):
            string += str(segment)
            string += "\n " if i + 1 < n else ""
        string += "]"
        return string

    def __repr__(self):
        """Computer-readable representation

        >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)])
        <Timeline(uri=None, segments=[<Segment(0, 10)>, <Segment(1, 13.37)>])>

        """

        return "<Timeline(uri=%s, segments=%s)>" % (self.uri,
                                                    list(self.segments_list_))

    def __contains__(self, included: Union[Segment, 'Timeline']):
        """Inclusion

        Check whether every segment of `included` does exist in timeline.

        Parameters
        ----------
        included : Segment or Timeline
            Segment or timeline being checked for inclusion

        Returns
        -------
        contains : bool
            True if every segment in `included` exists in timeline,
            False otherwise

        Examples
        --------
        >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)])
        >>> timeline2 = Timeline(segments=[Segment(0, 10)])
        >>> timeline1 in timeline2
        False
        >>> timeline2 in timeline1
        >>> Segment(1, 13.37) in timeline1
        True

        """

        if isinstance(included, Segment):
            return included in self.segments_set_

        elif isinstance(included, Timeline):
            return self.segments_set_.issuperset(included.segments_set_)

        else:
            raise TypeError(
                'Checking for inclusion only supports Segment and '
                'Timeline instances')

    def empty(self) -> 'Timeline':
        """Return an empty copy

        Returns
        -------
        empty : Timeline
            Empty timeline using the same 'uri' attribute.

        """
        return Timeline(uri=self.uri)

    def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \
            -> 'Timeline':
        """Get a copy of the timeline

        If `segment_func` is provided, it is applied to each segment first.

        Parameters
        ----------
        segment_func : callable, optional
            Callable that takes a segment as input, and returns a segment.
            Defaults to identity function (segment_func(segment) = segment)

        Returns
        -------
        timeline : Timeline
            Copy of the timeline

        """

        # if segment_func is not provided
        # just add every segment
        if segment_func is None:
            return Timeline(segments=self.segments_list_, uri=self.uri)

        # if is provided
        # apply it to each segment before adding them
        return Timeline(segments=[segment_func(s) for s in self.segments_list_],
                        uri=self.uri)

    def extent(self) -> Segment:
        """Extent

        The extent of a timeline is the segment of minimum duration that
        contains every segments of the timeline. It is unique, by definition.
        The extent of an empty timeline is an empty segment.

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.extent()
            |--------------------------------|

        Returns
        -------
        extent : Segment
            Timeline extent

        Examples
        --------
        >>> timeline = Timeline(segments=[Segment(0, 1), Segment(9, 10)])
        >>> timeline.extent()
        <Segment(0, 10)>

        """
        if self.segments_set_:
            segments_boundaries_ = self.segments_boundaries_
            start = segments_boundaries_[0]
            end = segments_boundaries_[-1]
            return Segment(start=start, end=end)
        else:
            import numpy as np
            return Segment(start=np.inf, end=-np.inf)

    def support_iter(self) -> Iterator[Segment]:
        """Like `support` but returns a segment generator instead

        See also
        --------
        :func:`pyannote.core.Timeline.support`
        """

        # The support of an empty timeline is an empty timeline.
        if not self:
            return

        # Principle:
        #   * gather all segments with no gap between them
        #   * add one segment per resulting group (their union |)
        # Note:
        #   Since segments are kept sorted internally,
        #   there is no need to perform an exhaustive segment clustering.
        #   We just have to consider them in their natural order.

        # Initialize new support segment
        # as very first segment of the timeline
        new_segment = self.segments_list_[0]

        for segment in self:

            # If there is no gap between new support segment and next segment,
            if not (segment ^ new_segment):
                # Extend new support segment using next segment
                new_segment |= segment

            # If there actually is a gap,
            else:
                yield new_segment

                # Initialize new support segment as next segment
                # (right after the gap)
                new_segment = segment

        # Add new segment to the timeline support
        yield new_segment

    def support(self) -> 'Timeline':
        """Timeline support

        The support of a timeline is the timeline with the minimum number of
        segments with exactly the same time span as the original timeline. It
        is (by definition) unique and does not contain any overlapping
        segments.

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.support()
            |------|  |--------|  |----------|

        Returns
        -------
        support : Timeline
            Timeline support
        """
        return Timeline(segments=self.support_iter(), uri=self.uri)

    def duration(self) -> float:
        """Timeline duration

        The timeline duration is the sum of the durations of the segments
        in the timeline support.

        Returns
        -------
        duration : float
            Duration of timeline support, in seconds.
        """

        # The timeline duration is the sum of the durations
        # of the segments in the timeline support.
        return sum(s.duration for s in self.support_iter())

    def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]:
        """Like `gaps` but returns a segment generator instead

        See also
        --------
        :func:`pyannote.core.Timeline.gaps`

        """

        if support is None:
            support = self.extent()

        if not isinstance(support, (Segment, Timeline)):
            raise TypeError("unsupported operand type(s) for -':"
                            "%s and Timeline." % type(support).__name__)

        # segment support
        if isinstance(support, Segment):

            # `end` is meant to store the end time of former segment
            # initialize it with beginning of provided segment `support`
            end = support.start

            # support on the intersection of timeline and provided segment
            for segment in self.crop(support, mode='intersection').support():

                # add gap between each pair of consecutive segments
                # if there is no gap, segment is empty, therefore not added
                gap = Segment(start=end, end=segment.start)
                if gap:
                    yield gap

                # keep track of the end of former segment
                end = segment.end

            # add final gap (if not empty)
            gap = Segment(start=end, end=support.end)
            if gap:
                yield gap

        # timeline support
        elif isinstance(support, Timeline):

            # yield gaps for every segment in support of provided timeline
            for segment in support.support():
                for gap in self.gaps_iter(support=segment):
                    yield gap

    def gaps(self, support: Optional[Support] = None) \
            -> 'Timeline':
        """Gaps

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.gaps()
                   |--|        |--|

        Parameters
        ----------
        support : None, Segment or Timeline
            Support in which gaps are looked for. Defaults to timeline extent

        Returns
        -------
        gaps : Timeline
            Timeline made of all gaps from original timeline, and delimited
            by provided support

        See also
        --------
        :func:`pyannote.core.Timeline.extent`

        """
        return Timeline(segments=self.gaps_iter(support=support),
                        uri=self.uri)

    def segmentation(self) -> 'Timeline':
        """Segmentation

        Create the unique timeline with same support and same set of segment
        boundaries as original timeline, but with no overlapping segments.

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.segmentation()
            |-|--|-|  |-|---|--|  |--|----|--|

        Returns
        -------
        timeline : Timeline
            (unique) timeline with same support and same set of segment
            boundaries as original timeline, but with no overlapping segments.
        """
        # COMPLEXITY: O(n)
        support = self.support()

        # COMPLEXITY: O(n.log n)
        # get all boundaries (sorted)
        # |------|    |------|     |----|
        #   |--|    |-----|     |----------|
        # becomes
        # | |  | |  | |   |  |  |  |    |  |
        timestamps = set([])
        for (start, end) in self:
            timestamps.add(start)
            timestamps.add(end)
        timestamps = sorted(timestamps)

        # create new partition timeline
        # | |  | |  | |   |  |  |  |    |  |
        # becomes
        # |-|--|-|  |-|---|--|  |--|----|--|

        # start with an empty copy
        timeline = Timeline(uri=self.uri)

        if len(timestamps) == 0:
            return Timeline(uri=self.uri)

        segments = []
        start = timestamps[0]
        for end in timestamps[1:]:
            # only add segments that are covered by original timeline
            segment = Segment(start=start, end=end)
            if segment and support.overlapping(segment.middle):
                segments.append(segment)
            # next segment...
            start = end

        return Timeline(segments=segments, uri=self.uri)

    def to_annotation(self,
                      generator: Union[str, Iterable[Label], None, None] = 'string',
                      modality: Optional[str] = None) \
            -> 'Annotation':
        """Turn timeline into an annotation

        Each segment is labeled by a unique label.

        Parameters
        ----------
        generator : 'string', 'int', or iterable, optional
            If 'string' (default) generate string labels. If 'int', generate
            integer labels. If iterable, use it to generate labels.
        modality : str, optional

        Returns
        -------
        annotation : Annotation
            Annotation
        """

        from .annotation import Annotation
        annotation = Annotation(uri=self.uri, modality=modality)
        if generator == 'string':
            from .utils.generators import string_generator
            generator = string_generator()
        elif generator == 'int':
            from .utils.generators import int_generator
            generator = int_generator()

        for segment in self:
            annotation[segment] = next(generator)

        return annotation

    def write_uem(self, file: TextIO):
        """Dump timeline to file using UEM format

        Parameters
        ----------
        file : file object
        
        Usage
        -----
        >>> with open('file.uem', 'w') as file:
        ...    timeline.write_uem(file)
        """

        uri = self.uri if self.uri else "<NA>"

        for segment in self:
            line = f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n"
            file.write(line)

    def for_json(self):
        """Serialization

        See also
        --------
        :mod:`pyannote.core.json`
        """

        data = {PYANNOTE_JSON: self.__class__.__name__}
        data[PYANNOTE_JSON_CONTENT] = [s.for_json() for s in self]

        if self.uri:
            data[PYANNOTE_URI] = self.uri

        return data

    @classmethod
    def from_json(cls, data):
        """Deserialization

        See also
        --------
        :mod:`pyannote.core.json`
        """

        uri = data.get(PYANNOTE_URI, None)
        segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]]
        return cls(segments=segments, uri=uri)

    def _repr_png_(self):
        """IPython notebook support

        See also
        --------
        :mod:`pyannote.core.notebook`
        """

        from .notebook import repr_timeline
        return repr_timeline(self)
def test_index_valueerror6():
    slt = SortedList(range(10))
    slt._reset(4)
    with pytest.raises(ValueError):
        slt.index(3, 5)