def buildPages(self): dividers = [n * 280 for n in range(1, self.n_pages)] indices = SortedList( set(dividers + [key for key in self.offset.keys()])) pages = SortedDict() for i in range(0, self.n_pages): # first page if i is 0: page = indices[0:indices.index(dividers[0]) - 1] # last page elif i is self.n_pages - 1: page = indices[indices.index(dividers[i - 1]) - 1:] else: page = indices[indices.index(dividers[i - 1]):indices. index(dividers[i])] pages[i] = page # remove dividers that hasn't been existed for key in pages: for item in pages[key]: if item not in self.offset: pages[key].remove(item) self.pagination = SortedDict() for i in range(self.n_pages): temp_key_list = [key for key in pages[i]] page_pool = [] for key in temp_key_list: page_pool.append(self.offset[key]) self.pagination[i] = ' '.join(page_pool)
def get_table_structure(cells): row_lines = set() col_lines = set() for cell in cells: row_lines.add(cell.top_left.y) row_lines.add(cell.bottom_right.y) col_lines.add(cell.top_left.x) col_lines.add(cell.bottom_right.x) row_lines = SortedList(row_lines) col_lines = SortedList(col_lines) table_cells = [list() for _ in range(len(row_lines) - 1)] for cell in cells: row_no = row_lines.index(cell.top_left.y) col_no = col_lines.index(cell.top_left.x) row_span = row_lines.index(cell.bottom_right.y) - row_no col_span = col_lines.index(cell.bottom_right.x) - col_no table_cells[row_no].append( TableCell(cell, row_no, col_no, row_span, col_span)) table_cells = [ sorted(row, key=lambda c: c.column_no) for row in table_cells ] return TableStructure(table_cells, len(row_lines) - 1, len(col_lines) - 1)
def coincident_indices(list0, list1, delta): """Get indices of coincident times in both lists as dictionary. Parameters ---------- list0 : list List of times list1 : list List of times (preferably longer than ``list0``) delta : float Time-delta slice Returns ------- coincidents : dict {index_list0: index_list1} """ slist0 = SortedList(list0) slist1 = SortedList(list1) coincidents = {} for t0 in iter(slist0): times = list(slist1.irange(t0 - delta, t0 + delta)) diffs = [] for t1 in iter(times): diffs.append(abs(t0 - t1)) if len(diffs) > 0: coincidents[slist0.index(t0)] = slist1.index( times[np.argmin(diffs)]) return coincidents
def test_pagination(self): print('test3') t = t3 dividers = [n * 280 for n in range(1, t.n_pages)] self.assertEqual(len(dividers), 1) self.assertEqual(dividers[0], 280) indices = SortedList(set(dividers + [key for key in t.offset.keys()])) self.assertEqual( indices, SortedList([ 0, 4, 8, 17, 20, 26, 34, 44, 56, 65, 70, 79, 84, 88, 93, 102, 109, 124, 137, 142, 146, 155, 165, 171, 175, 179, 188, 192, 203, 213, 226, 230, 235, 239, 249, 252, 256, 262, 272, 280 ])) pages = SortedDict() for i in range(0, t.n_pages): # first page if i is 0: page = indices[0:indices.index(dividers[0]) - 1] self.assertEqual(page, [ 0, 4, 8, 17, 20, 26, 34, 44, 56, 65, 70, 79, 84, 88, 93, 102, 109, 124, 137, 142, 146, 155, 165, 171, 175, 179, 188, 192, 203, 213, 226, 230, 235, 239, 249, 252, 256, 262 ]) # last page elif i is t.n_pages - 1: page = indices[indices.index(dividers[i - 1]) - 1:] else: page = indices[indices.index(dividers[i - 1]):indices. index(dividers[i])] pages[i] = page for key in pages: for item in pages[key]: if item not in t.offset: pages[key].remove(item) self.assertNotEqual(len(pages), 0) t.pagination = SortedDict() for i in range(t.n_pages): temp_key_list = [key for key in pages[i]] page_pool = [] for key in temp_key_list: page_pool.append(t.offset[key]) t.pagination[i] = ' '.join(page_pool) self.assertIn('pagination', t.__dict__) for i in range(t.n_pages): self.assertLess(len(t.pagination[i]), 280)
def _check_monotonicity(slist: SortedList, sort_dir: str, x: Union[float, int], y: float) -> Tuple[SortedList, str]: item = (x, y) slist.add(item) idx = slist.index(item) num_vals = len(slist) if num_vals >= 3: if idx == num_vals - 1: filtered_list = slist[-3:] elif idx == 0: filtered_list = slist[:3] else: filtered_list = slist[idx - 1:idx + 2] none_increasing = _non_increasing(filtered_list) none_decreasing = _non_decreasing(filtered_list) if none_decreasing and none_increasing: updated_sort_dir = 'unknown' elif none_decreasing: updated_sort_dir = 'up' elif none_increasing: updated_sort_dir = 'down' else: print('Binary iterator observed non-monotonic values. Entering debugging mode:') breakpoint() # noinspection PyUnboundLocalVariable if sort_dir != 'unknown' and sort_dir != updated_sort_dir: print('Binary iterator observed non-monotonic values. Entering debugging mode:') breakpoint() sort_dir = updated_sort_dir return slist, sort_dir
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: if t < 0: return False s = SortedList() for i in range(0, len(nums)): # print(s) if i > k: numToDelete = nums[i - k - 1] s.remove(numToDelete) # print(s) if s.__contains__(nums[i]): # print("yea") return True s.add(nums[i]) pos = s.index(nums[i]) # print(s, pos) if pos > 0: tmp = s.__getitem__(pos - 1) # print(tmp) if abs(nums[i] - tmp) <= t: return True if pos < len(s) - 1: tmp = s.__getitem__(pos + 1) # print(tmp) if abs(nums[i] - tmp) <= t: return True return False
def streamUpdate(stream, target): sl = SortedList() for num in stream: # insert num sl.add(num) index = sl.index(num) # Three conditions that need to remove nums # (index - 2, index - 1, index) (index - 1, index, index + 1), (index, index + 1, index + 2) flag = False if index - 2 >= 0: num1 = sl[index - 2] num2 = sl[index - 1] num3 = sl[index] if num2 - num1 <= target and num3 - num2 <= target: flag = True elif flag == False and index - 1 >= 0 and index + 1 < len(sl): num1 = sl[index - 1] num2 = sl[index] num3 = sl[index + 1] if num2 - num1 <= target and num3 - num2 <= target: flag = True elif flag == False and index + 2 < len(sl): num1 = sl[index] num2 = sl[index + 1] num3 = sl[index + 2] if num2 - num1 <= target and num3 - num2 <= target: flag = True if flag: sl.remove(num1) sl.remove(num2) sl.remove(num3) print(sl)
class ColorTransferFunction(Data): point_added = Signal(ColorTransferFunctionPoint) def __init__(self, path: Path = None): super().__init__(path) self.points = SortedList() @classmethod def from_x_fractions_colors_array( cls, x_fractions_colors_array: np.ndarray, max_x: int = 255) -> ColorTransferFunction: color_transfer_function = cls() for row in x_fractions_colors_array: x_fraction = row[0] color_array = row[1:] color_transfer_function.add_point_from_x_color( x_fraction * max_x, color_array) return color_transfer_function @classmethod def default_jet(cls, max_x: int = 255) -> ColorTransferFunction: return ColorTransferFunction.from_x_fractions_colors_array( np.array([[0, 0, 0, 255, 255], [0.25, 0, 255, 255, 255], [0.5, 0, 255, 0, 255], [0.75, 255, 255, 0, 255], [1, 255, 0, 0, 255]]), max_x) def add_point(self, point: ColorTransferFunctionPoint): self.points.add(point) self.point_added.emit(point) def add_point_from_x_color(self, x: float, color_array: np.ndarray = np.full((4, ), 255)): self.add_point(ColorTransferFunctionPoint(x, color_array)) def point_before( self, point: ColorTransferFunctionPoint) -> ColorTransferFunctionPoint: return self.points[self.points.index(point) - 1] def point_after( self, point: ColorTransferFunctionPoint) -> ColorTransferFunctionPoint: return self.points[self.points.index(point) + 1]
def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool: from sortedcontainers import SortedList window = SortedList() for i, val in enumerate(nums): if i - k > 0: idx = window.index(nums[i - k - 1]) window.pop(idx) window.add(val) j = window.index(val) if j - 1 >= 0 and abs(window[j - 1] - val) <= t: return True if j + 1 < len(window) and abs(window[j + 1] - val) <= t: return True return False
def kEmptySlots(self, bulbs: List[int], k: int) -> int: sl = SortedList([float('-inf'), float('inf')]) for day, n in enumerate(bulbs, 1): sl.add(n) i = sl.index(n) if k + 1 in (sl[i] - sl[i - 1], sl[i + 1] - sl[i]): return day return -1
class Solution: def __init__(self, m: int, k: int): from sortedcontainers import SortedList self.m, self.k = m, k self.queue = deque() self.sl = SortedList() self.total = self.left_k = self.right_k = 0 def addElement(self, num: int) -> None: if len(self.sl) < self.m - 1: self.sl.add(num) self.queue.append(num) self.total += num elif len(self.sl) == self.m - 1: self.sl.add(num) self.queue.append(num) self.total += num self.left_k = sum(self.sl[:self.k]) self.right_k = sum(self.sl[-self.k:]) else: # add element, update left_k, right_k index = self.sl.bisect_left(num) if index < self.k: self.left_k += num self.left_k -= self.sl[self.k - 1] if index > len(self.sl) - self.k: self.right_k += num self.right_k -= self.sl[len(self.sl) - self.k] self.sl.add(num) self.total += num self.queue.append(num) if len(self.sl) > self.m: num = self.queue.popleft() index = self.sl.index(num) print(num, index) if index < self.k: self.left_k -= num self.left_k += self.sl[self.k] if index > self.m - self.k: self.right_k -= num self.right_k += self.sl[self.m - self.k] self.total -= num self.sl.remove(num) def calculateMKAverage(self) -> int: if len(self.sl) < self.m: return -1 return (self.total - self.left_k - self.right_k) // (self.m - 2 * self.k)
def test_index(): slt = SortedList(range(100), load=17) for val in range(100): assert val == slt.index(val) assert slt.index(99, 0, 1000) == 99 slt = SortedList((0 for rpt in range(100)), load=17) for start in range(100): for stop in range(start, 100): assert slt.index(0, start, stop + 1) == start for start in range(100): assert slt.index(0, -(100 - start)) == start assert slt.index(0, -1000) == 0
def test_index(): slt = SortedList(range(100), load=17) for val in range(100): assert val == slt.index(val) assert slt.index(99, 0, 1000) == 99 slt = SortedList((0 for rpt in range(100)), load=17) for start in range(100): for stop in range(start, 100): assert slt.index(0, start, stop + 1) == start for start in range(100): assert slt.index(0, -(100 - start)) == start assert slt.index(0, -1000) == 0
class ZSet: def __init__(self): self.mem2score = {} self.scores = SortedList() def __contains__(self, val): return val in self.mem2score def __setitem__(self, val, score): self.add(val, score) def __getitem__(self, key): return self.mem2score[key] def __len__(self): return len(self.mem2score) def __iter__(self): def f(): for score, val in self.scores: yield val return f() def __str__(self): ans = [] return def get(self, key, default=None): return self.mem2score.get(key, default) def add(self, val, score): s_prev = self.mem2score.get(val, None) if s_prev: if s_prev == score: return False self.scores.remove((s_prev, val)) self.mem2score[val] = score self.scores.add((score, val)) return True def discard(self, key): try: score = self.mem2score.pop(key) except KeyError: return self.scores.remove((score, key)) def items(self): return self.mem2score.items() def rank(self, member): return self.scores.index((self.mem2score[member], member)) def islice_score(self, start, stop, reverse=False): return self.scores.islice(start, stop, reverse)
class MKAverage: def __init__(self, m: int, k: int): self.m, self.k = m, k self.deque = collections.deque() self.sortedNums = SortedList() self.total = self.first_k = self.last_k = 0 def addElement(self, num: int) -> None: self.total += num self.deque.append(num) index = self.sortedNums.bisect_left(num) # x x x [ x x x | x x x | x x x ] x # ^ # x x x [ x x x x | x x x | x x x ] x # x x x [ x x x | x x x | x x x ] x x # insert into first k if index <= self.k - 1: self.first_k += num if len(self.sortedNums) >= self.k: self.first_k -= self.sortedNums[self.k - 1] # x x x [ x x x | x x x | x x x ] x # ^ # x x x [ x x x | x x x | x x x x ] x # x x x x [ x x x | x x x | x x x ] x # insert into last k, plus 1 here because bisect_left if index >= len(self.sortedNums) - self.k + 1: self.last_k += num if len(self.sortedNums) >= self.k: self.last_k -= self.sortedNums[-self.k] self.sortedNums.add(num) # remove extra num from the size m sliding window if len(self.deque) > self.m: num = self.deque.popleft() self.total -= num index = self.sortedNums.index(num) if index <= self.k - 1: self.first_k -= num self.first_k += self.sortedNums[self.k] elif index >= len(self.sortedNums) - self.k: self.last_k -= num self.last_k += self.sortedNums[-(self.k + 1)] self.sortedNums.remove(num) def calculateMKAverage(self) -> int: if len(self.sortedNums) < self.m: return -1 return (self.total - self.first_k - self.last_k) // (self.m - 2 * self.k)
def collate_fn(self, interaction): users_orig = interaction['user_id'] pos_orig = interaction[self.ITEM_ID] + self.num_users try: neg_orig = interaction[self.NEG_ITEM_ID] + self.num_users except Exception: neg_orig = th.zeros((0, )) unique = SortedList( set(users_orig.numpy().tolist() + pos_orig.numpy().tolist() + neg_orig.numpy().tolist())) all_nodes = th.Tensor(list(unique)).long() users = th.Tensor([unique.index(x) for x in users_orig]).long() pos = th.Tensor([unique.index(x) for x in pos_orig]).long() neg = th.Tensor([unique.index(x) for x in neg_orig]).long() collations = self.collator.collate(all_nodes) _, _, blocks = collations return users, pos, neg, blocks
def fast_generator(rotors: list): from sortedcontainers import SortedList intersection_pairs = set() intersections_set = set() status_array = SortedList() event_points: list = [] for rotor_center in rotors: affiliations: list = [ SemiCircle(circle_center=rotor_center, side=side) for side in [SemiCircleSide.left, SemiCircleSide.right] ] heapq.heappush( event_points, EventPoint(coordinates=(rotor_center[0], rotor_center[1] + 1), affiliations=affiliations, event_type=EventPointType.upper)) heapq.heappush( event_points, EventPoint(coordinates=(rotor_center[0], rotor_center[1] - 1), affiliations=affiliations, event_type=EventPointType.bottom)) while event_points: next_event_point: EventPoint = heapq.heappop(event_points) shared.sweep_line_progress = next_event_point.coordinates[1] if next_event_point.event_type == EventPointType.upper: status_array.update(next_event_point.affiliations) if next_event_point.event_type == EventPointType.intersection: status_array.discard(next_event_point.affiliations[0]) status_array.discard(next_event_point.affiliations[1]) status_array.update(next_event_point.affiliations) left_semi_circle_position: int = status_array.index( min(next_event_point.affiliations)) # assert max(next_event_point.affiliations) == status_array[left_semi_circle_position + (-1) ** (next_event_point.event_type == EventPointType.intersection)] if next_event_point.event_type == EventPointType.bottom: status_array.discard(next_event_point.affiliations[0]) status_array.discard(next_event_point.affiliations[1]) refine_intersections( intersection_pairs=intersection_pairs, intersections_set=intersections_set, status_array=status_array, event_points=event_points, left_semi_circle_position=left_semi_circle_position, deletion=next_event_point.event_type == EventPointType.bottom) for pair in intersection_pairs: yield pair
class MKAverage(object): def __init__(self, m, k): """ :type m: int :type k: int """ self.__m = m self.__k = k self.__dq = collections.deque() self.__sl = SortedList() self.__total = self.__first_k = self.__last_k = 0 def addElement(self, num): """ :type num: int :rtype: None """ if len(self.__dq) == self.__m: self.__remove(self.__dq.popleft()) self.__dq.append(num) self.__add(num) def calculateMKAverage(self): """ :rtype: int """ if len(self.__sl) < self.__m: return -1 return (self.__total - self.__first_k - self.__last_k) // (self.__m - 2 * self.__k) def __add(self, num): self.__total += num idx = self.__sl.bisect_left(num) if idx < self.__k: self.__first_k += num if len(self.__sl) >= self.__k: self.__first_k -= self.__sl[self.__k - 1] if idx > len(self.__sl) - self.__k: self.__last_k += num if len(self.__sl) >= self.__k: self.__last_k -= self.__sl[-self.__k] self.__sl.add(num) def __remove(self, num): self.__total -= num idx = self.__sl.index(num) if idx < self.__k: self.__first_k -= num self.__first_k += self.__sl[self.__k] elif idx > (len(self.__sl) - 1) - self.__k: self.__last_k -= num self.__last_k += self.__sl[-1 - self.__k] self.__sl.remove(num)
def to_vector(self, ordering: SortedList) -> State: """ Convert the Side to a State accoring to given ordering. :param ordering: sequence of complex agents :return: State representing vector """ vector = np.zeros(len(ordering), dtype=int) multiset = self.to_counter() for agent in list(multiset): vector[ordering.index(agent)] = multiset[agent] return State(vector)
def numTeams(self, rating: List[int]) -> int: ans = 0 N=len(rating) smaller = [0] * N larger = [0] * N sl = SortedList() for i in range(N): sl.add(rating[i]) j = sl.index(rating[i]) smaller[i] = j sl = SortedList() for i in reversed(range(N)): sl.add(rating[i]) j = sl.index(rating[i]) larger[i] = len(sl)-j-1 for i in range(N): ans += smaller[i] * larger[i] + (i-smaller[i]) * (N-i-1-larger[i]) return ans
def processQueries3(self, queries: List[int], m: int) -> List[int]: vpos = {i+1: i for i in range(m)} poses = SortedList(range(m)) res = [] front = -1 for v in queries: pos = vpos[v] res.append(poses.index(pos)) vpos[v] = front poses.remove(pos) poses.add(front) front -= 1 return res
def update_entry(cls, book: SortedList, entry: OrderBookEntry): if Decimal(entry.amount) == Decimal('0'): # remove price if amount is 0 try: book.remove(entry) except ValueError: pass else: try: idx = book.index(entry) except ValueError: # price not found, insert it book.add(entry) else: # price found, update amount book[idx].amount = entry.amount
class MySorted: def __init__(self): self.elements = SortedList() def index(self, val): if not self.elements or self.elements[0] >= val: return 0 elif self.elements[-1] <= val: return len(self.elements) return self.elements.index(val) def append(self, val): self.elements.append(val) def __len__(self): return len(self.elements)
class MKAverage: def __init__(self, m: int, k: int): self.m, self.k = m, k self.deque = collections.deque() self.sl = SortedList() self.total = self.first_k = self.last_k = 0 def addElement(self, num: int) -> None: self.total += num self.deque.append(num) index = self.sl.bisect_left(num) if index < self.k: self.first_k += num if len(self.sl) >= self.k: self.first_k -= self.sl[self.k - 1] if index >= len(self.sl) + 1 - self.k: self.last_k += num if len(self.sl) >= self.k: self.last_k -= self.sl[-self.k] self.sl.add(num) if len(self.deque) > self.m: num = self.deque.popleft() self.total -= num index = self.sl.index(num) if index < self.k: self.first_k -= num self.first_k += self.sl[self.k] elif index >= len(self.sl) - self.k: self.last_k -= num self.last_k += self.sl[-self.k - 1] self.sl.remove(num) def calculateMKAverage(self) -> int: if len(self.sl) < self.m: return -1 return (self.total - self.first_k - self.last_k) // (self.m - 2 * self.k)
def test_index_valueerror4(): slt = SortedList([0] * 10, load=4) slt.index(1)
def test_index_valueerror2(): slt = SortedList([0] * 10, load=4) slt.index(0, 0, -10)
def test_index_valueerror7(): slt = SortedList([0] * 10 + [2] * 10) slt._reset(4) with pytest.raises(ValueError): slt.index(1, 0, 10)
def stress_index2(slt): values = list(slt)[:3] * 200 slt = SortedList(values) for idx, val in enumerate(slt): assert slt.index(val, idx) == idx
def test_index_valueerror6(): slt = SortedList(range(10)) slt._reset(4) with pytest.raises(ValueError): slt.index(3, 5)
def test_index_valueerror3(): slt = SortedList([0] * 10) slt._reset(4) with pytest.raises(ValueError): slt.index(0, 7, 3)
def test_index_valueerror3(): slt = SortedList([0] * 10) slt._reset(4) with pytest.raises(ValueError): slt.index(0, 7, 3)
def stress_index2(slt): values = list(slt)[:3] * 200 slt = SortedList(values) for idx, val in enumerate(slt): assert slt.index(val, idx) == idx
def test_index_valueerror5(): slt = SortedList() with pytest.raises(ValueError): slt.index(1)
class SweepLine(object): ''' This class represents the vertical sweep line which sweeps over the set of segments in the Bentley-Ottmann algorithm. At any moment, it contains a sorted list of all the ComparableSegments which intersect with the sweep line in its current position. Note that if 2 segments s1 and s2 are overlapping, you cannot assume anything about their order in the sorted queue, as s1 < s2 and s1 > s2 are both false Such sorted list would usually rely on a balanced binary search tree data structure in order to have O(log(N)) insertion, deletion and swapping. Instead, I chose to use a SortedList of Grant Jenks' SortedContainers module, which has several advantages that you can discover by browsing its page. It allows O(log(N)) insertion, deletion and swapping, and I find it to be faster in practice. ''' def __init__(self): ''' Initializes an empty sweep line. ''' self.l = SortedList() def isEmpty(self): ''' Returns true if and only if the sweep line is empty. ''' return len(self.l) == 0 def addSegment(self, seg): ''' Adds seg to the sweep line. ''' ComparableSegment.currentX = seg.x1 self.l.add(seg) def removeSegment(self, seg): ''' Removes seg from the sweep line. ''' self.l.remove(seg) def belowSegments(self, seg): ''' Returns a list containing : - The highest segment s_below contained in the sweep line such as s_below.isBelow(seg) - All the segments s contained before s_below in the sweep line but such as s.isBelow(s_below) is false, (i.e. which have the same y-coordinate at ComparableSegment.currentX and gradient). ''' res = [] # i = index of seg i = self.l.index(seg) # Passes segments which have same y-coordinate and gradient # to find s_below while i-1 >= 0: prev = self.l[i-1] i -= 1 if prev.isBelow(seg): res.append(prev) break # Appends all the segments which have same y-coordinate and # gradient as s_below while i-1 >= 0: prev = self.l[i-1] if prev.isBelow(res[0]): break res.append(prev) i -= 1 return res def aboveSegments(self, seg): ''' Returns a list containing : - The lowest segment s_above contained in the sweep line such as seg < s_above - All the segments s contained after s_above in the sweep line but such as s_below < s is false, (i.e. which have the same y-coordinate at ComparableSegment.currentX and gradient). ''' res = [] # i = index of seg i = self.l.index(seg) # Passes segments which have same y-coordinate and gradient # to find s_above while i+1 < len(self.l): succ = self.l[i+1] i += 1 if seg.isBelow(succ): res.append(succ) break # Appends all the segments which have same y-coordinate and # gradient as s_above while i+1 < len(self.l): succ = self.l[i+1] if res[0].isBelow(succ): break res.append(succ) i += 1 return res def sameLevelAs(self, seg): ''' Returns a list containing the segments s of the line such as s.aboveSegments(seg) and seg.aboveSegments(s) are both false, i.e. all the segments with same y-coordinate at ComparableSegment.currentX and gradient as seg. ''' i = self.l.index(seg) res = [self.l[i]] # Looks for same level segments above j = i + 1 while j < len(self.l) and not seg.isBelow(self.l[j]): res.append(self.l[j]) j += 1 # Looks for same level segments below j = i - 1 while j >= 0 and not self.l[j].isBelow(seg): res.append(self.l[j]) j -= 1 return res def betweenY(self, y_inf, y_sup, x): ''' Returns a list of all the segments intersecting the sweep line between y-coordinates y_inf and y_sup included, at x-coordinate x ''' ComparableSegment.currentX = x res = [] i = 0 # Passes segments whose y-coordinate is < y_inf while i < len(self.l) and self.l[i].yAtX(x) < y_inf: i += 1 while i < len(self.l) and self.l[i].yAtX(x) <= y_sup: res.append(self.l[i]) i += 1 return res def revertOrder(self, x, segments): ''' Reverse the order of segments in the sweep line, at coord (x, y). ''' indices = [] for seg in segments: ComparableSegment.currentX = seg.x1 indices.append(self.l.index(seg)) # Update segments currentX so that the swap keep sort order ComparableSegment.currentX = x # Swaps the segments for i in range(floor(len(segments)/2)): i1 = indices[i] i2 = indices[-i-1] self.l[i1] = segments[-i-1] self.l[i2] = segments[i]
def test_index_valueerror5(): slt = SortedList() slt.index(1)
def test_index_valueerror7(): slt = SortedList([0] * 10 + [2] * 10, load=4) slt.index(1, 0, 10)
def test_index_valueerror6(): slt = SortedList(range(10), load=4) slt.index(3, 5)
class PriorityDict(MutableMapping): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. """ def __init__(self, *args, **kwargs): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. If the first argument is the boolean value False, then it indicates that keys are not comparable. By default this setting is True and duplicate values are tie-breaked on the key. Using comparable keys improves the performance of the PriorityDict. An optional *iterable* argument provides an initial series of items to populate the PriorityDict. Each item in the sequence must itself contain two items. The first is used as a key in the new dictionary, and the second as the key's value. If a given key is seen more than once, the last value associated with it is retained in the new dictionary. If keyword arguments are given, the keywords themselves with their associated values are added as items to the dictionary. If a key is specified both in the positional argument and as a keyword argument, the value associated with the keyword is retained in the dictionary. For example, these all return a dictionary equal to ``{"one": 2, "two": 3}``: * ``SortedDict(one=2, two=3)`` * ``SortedDict({'one': 2, 'two': 3})`` * ``SortedDict(zip(('one', 'two'), (2, 3)))`` * ``SortedDict([['two', 3], ['one', 2]])`` The first example only works for keys that are valid Python identifiers; the others work with any valid keys. Note that this constructor mimics the Python dict constructor. If you're looking for a constructor like collections.Counter(...), see PriorityDict.count(...). """ self._dict = dict() if len(args) > 0 and isinstance(args[0], bool): if args[0]: self._list = SortedList() else: self._list = SortedListWithKey(key=lambda tup: tup[0]) else: self._list = SortedList() self.iloc = _IlocWrapper(self) self.update(*args, **kwargs) def clear(self): """Remove all elements from the dictionary.""" self._dict.clear() self._list.clear() def clean(self, value=0): """ Remove all items with value less than or equal to `value`. Default `value` is 0. """ _list, _dict = self._list, self._dict pos = self.bisect_right(value) for key in (key for value, key in _list[:pos]): del _dict[key] del _list[:pos] def __contains__(self, key): """Return True if and only if *key* is in the dictionary.""" return key in self._dict def __delitem__(self, key): """ Remove ``d[key]`` from *d*. Raises a KeyError if *key* is not in the dictionary. """ value = self._dict[key] self._list.remove((value, key)) del self._dict[key] def __getitem__(self, key): """ Return the priority of *key* in *d*. Raises a KeyError if *key* is not in the dictionary. """ return self._dict[key] def __iter__(self): """ Create an iterator over the keys of the dictionary ordered by the value sort order. """ return iter(key for value, key in self._list) def __reversed__(self): """ Create an iterator over the keys of the dictionary ordered by the reversed value sort order. """ return iter(key for value, key in reversed(self._list)) def __len__(self): """Return the number of (key, value) pairs in the dictionary.""" return len(self._dict) def __setitem__(self, key, value): """Set `d[key]` to *value*.""" if key in self._dict: old_value = self._dict[key] self._list.remove((old_value, key)) self._list.add((value, key)) self._dict[key] = value def copy(self): """Create a shallow copy of the dictionary.""" result = PriorityDict() result._dict = self._dict.copy() result._list = self._list.copy() result.iloc = _IlocWrapper(result) return result def __copy__(self): """Create a shallow copy of the dictionary.""" return self.copy() @classmethod def fromkeys(cls, iterable, value=0): """ Create a new dictionary with keys from `iterable` and values set to `value`. The default *value* is 0. """ return PriorityDict((key, value) for key in iterable) def get(self, key, default=None): """ Return the value for *key* if *key* is in the dictionary, else *default*. If *default* is not given, it defaults to ``None``, so that this method never raises a KeyError. """ return self._dict.get(key, default) def has_key(self, key): """Return True if and only in *key* is in the dictionary.""" return key in self._dict def pop(self, key, default=_NotGiven): """ If *key* is in the dictionary, remove it and return its value, else return *default*. If *default* is not given and *key* is not in the dictionary, a KeyError is raised. """ if key in self._dict: value = self._dict[key] self._list.remove((value, key)) return self._dict.pop(key) else: if default == _NotGiven: raise KeyError else: return default def popitem(self, index=-1): """ Remove and return item at *index* (default: -1). Raises IndexError if dict is empty or index is out of range. Negative indices are supported as for slice indices. """ value, key = self._list.pop(index) del self._dict[key] return key, value def setdefault(self, key, default=0): """ If *key* is in the dictionary, return its value. If not, insert *key* with a value of *default* and return *default*. *default* defaults to ``0``. """ if key in self._dict: return self._dict[key] else: self._dict[key] = default self._list.add((default, key)) return default def elements(self): """ Return an iterator over elements repeating each as many times as its count. Elements are returned in value sort-order. If an element’s count is less than one, elements() will ignore it. """ values = (repeat(key, value) for value, key in self._list) return chain.from_iterable(values) def most_common(self, count=None): """ Return a list of the `count` highest priority elements with their priority. If `count` is not specified, `most_common` returns *all* elements in the dict. Elements with equal counts are ordered by key. """ _list, _dict = self._list, self._dict if count is None: return [(key, value) for value, key in reversed(_list)] end = len(_dict) start = end - count return [(key, value) for value, key in reversed(_list[start:end])] def subtract(self, elements): """ Elements are subtracted from an iterable or from another mapping (or counter). Like dict.update() but subtracts counts instead of replacing them. Both inputs and outputs may be zero or negative. """ self -= Counter(elements) def tally(self, *args, **kwargs): """ Elements are counted from an iterable or added-in from another mapping (or counter). Like dict.update() but adds counts instead of replacing them. Also, the iterable is expected to be a sequence of elements, not a sequence of (key, value) pairs. """ self += Counter(*args, **kwargs) @classmethod def count(self, *args, **kwargs): """ Consume `args` and `kwargs` with a Counter and use that mapping to initialize a PriorityDict. """ return PriorityDict(Counter(*args, **kwargs)) def update(self, *args, **kwargs): """ Update the dictionary with the key/value pairs from *other*, overwriting existing keys. *update* accepts either another dictionary object or an iterable of key/value pairs (as a tuple or other iterable of length two). If keyword arguments are specified, the dictionary is then updated with those key/value pairs: ``d.update(red=1, blue=2)``. """ _list, _dict = self._list, self._dict if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping): items = args[0] else: items = dict(*args, **kwargs) if (10 * len(items)) > len(_dict): _dict.update(items) _list.clear() _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(items): old_value = _dict[key] _list.remove((old_value, key)) _dict[key] = value _list.add((value, key)) def index(self, key): """ Return the smallest *i* such that `d.iloc[i] == key`. Raises KeyError if *key* is not present. """ value = self._dict[key] return self._list.index((value, key)) def bisect_left(self, value): """ Similar to the ``bisect`` module in the standard library, this returns an appropriate index to insert *value* in PriorityDict. If *value* is already present in PriorityDict, the insertion point will be before (to the left of) any existing entries. """ return self._list.bisect_left((value,)) def bisect(self, value): """Same as bisect_left.""" return self._list.bisect((value,)) def bisect_right(self, value): """ Same as `bisect_left`, but if *value* is already present in PriorityDict, the insertion point will be after (to the right of) any existing entries. """ return self._list.bisect_right((value, _Biggest)) def __iadd__(self, that): """Add values from `that` mapping.""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.update(that) _list.update((value, key) for key, value in iteritems(_dict)) elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: _dict[key] += value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value + value _dict[key] = value _list.add((value, key)) return self def __isub__(self, that): """Subtract values from `that` mapping.""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.clear() _list.clear() elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: _dict[key] -= value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value - value _dict[key] = value _list.add((value, key)) return self def __ior__(self, that): """Or values from `that` mapping (max(v1, v2)).""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.update(that) _list.update((value, key) for key, value in iteritems(_dict)) elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value > value else value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value if old_value > value else value _dict[key] = value _list.add((value, key)) return self def __iand__(self, that): """And values from `that` mapping (min(v1, v2)).""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.clear() _list.clear() elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value < value else value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value if old_value < value else value _dict[key] = value _list.add((value, key)) return self def __add__(self, that): """Add values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: _dict[key] += value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) return result def __sub__(self, that): """Subtract values in `that` mapping from this.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: _dict[key] -= value _list.update((value, key) for key, value in iteritems(_dict)) return result def __or__(self, that): """Or values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value > value else value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) return result def __and__(self, that): """And values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value < value else value _list.update((value, key) for key, value in iteritems(_dict)) return result def __eq__(self, that): """Compare two mappings for equality.""" if isinstance(that, PriorityDict): that = that._dict return self._dict == that def __ne__(self, that): """Compare two mappings for inequality.""" if isinstance(that, PriorityDict): that = that._dict return self._dict != that def __lt__(self, that): """Compare two mappings for less than.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (_dict != that and self <= that) def __le__(self, that): """Compare two mappings for less than equal.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (len(_dict) <= len(that) and all(_dict[key] <= that[key] if key in that else False for key in _dict)) def __gt__(self, that): """Compare two mappings for greater than.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (_dict != that and self >= that) def __ge__(self, that): """Compare two mappings for greater than equal.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (len(_dict) >= len(that) and all(_dict[key] >= that[key] if key in _dict else False for key in that)) def isdisjoint(self, that): """ Return True if no key in `self` is also in `that`. This doesn't check that the value is greater than zero. To remove keys with value less than or equal to zero see *clean*. """ return not any(key in self for key in that) def items(self): """ Return a list of the dictionary's items (``(key, value)`` pairs). Items are ordered by their value from least to greatest. """ return list((key, value) for value, key in self._list) def iteritems(self): """ Return an iterable over the items (``(key, value)`` pairs) of the dictionary. Items are ordered by their value from least to greatest. """ return iter((key, value) for value, key in self._list) @not26 def viewitems(self): """ In Python 2.7 and later, return a new `ItemsView` of the dictionary's items. Beware iterating the `ItemsView` as items are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewitems() else: return self._dict.items() def keys(self): """ Return a list of the dictionary's keys. Keys are ordered by their corresponding value from least to greatest. """ return list(key for value, key in self._list) def iterkeys(self): """ Return an iterable over the keys of the dictionary. Keys are ordered by their corresponding value from least to greatest. """ return iter(key for value, key in self._list) @not26 def viewkeys(self): """ In Python 2.7 and later, return a new `KeysView` of the dictionary's keys. Beware iterating the `KeysView` as keys are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewkeys() else: return self._dict.keys() def values(self): """ Return a list of the dictionary's values. Values are ordered from least to greatest. """ return list(value for value, key in self._list) def itervalues(self): """ Return an iterable over the values of the dictionary. Values are iterated from least to greatest. """ return iter(value for value, key in self._list) @not26 def viewvalues(self): """ In Python 2.7 and later, return a `ValuesView` of the dictionary's values. Beware iterating the `ValuesView` as values are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewvalues() else: return self._dict.values() def __repr__(self): """Return a string representation of PriorityDict.""" return 'PriorityDict({0})'.format(repr(dict(self))) def _check(self): self._list._check() assert len(self._dict) == len(self._list) assert all(key in self._dict and self._dict[key] == value for value, key in self._list)
def test_index_valueerror5(): slt = SortedList() with pytest.raises(ValueError): slt.index(1)
def prev_workspace_for_current_output_num(): current_workspace, output_workspaces = workspaces_for_current_output() sorted_workspaces = SortedList( workspace["num"] for workspace in output_workspaces) current_num = sorted_workspaces.index(current_workspace["num"]) return sorted_workspaces[(current_num - 1) % len(sorted_workspaces)]
def test_index_valueerror7(): slt = SortedList([0] * 10 + [2] * 10) slt._reset(4) with pytest.raises(ValueError): slt.index(1, 0, 10)
class UrlClusterizedQueue: def __init__(self): self._features_count_list = SortedList() # list of pairs (feature_count, feature_name) self._features_count_dict = dict() self._clusterizer = DBSCAN(metric='jaccard') self._min_freq = 0.1 self._max_freq = 0.9 self._urls = dict() self._urls_keys = [] self._index = -1 self._min_urls_count = 50 self._subqueue = Queue() self._subqueue_len = 4 # constants self._i_is_used = 1 self._i_features = 0 self._i_list_for_unused = 0 self._i_list_for_used = 1 self._i_list_total = 2 def _return_url(self, url): self._urls[url][1] = True return url def _next_queue_fallback(self): self._index += 1 url = self._urls_keys[self._index] return self._return_url(url) def _run_clustering(self): print("try to use clustering") # here we need to run clustering # first of all we need to choose features max_feature_count = int(self._max_freq * len(self._urls)) min_feature_count = int(self._min_freq * len(self._urls)) start_index = bisect.bisect_left(self._features_count_list, (min_feature_count, '')) end_index = bisect.bisect_right(self._features_count_list, (max_feature_count, 'ZZZ')) if start_index >= end_index: print("not enough features") return self._next_queue_fallback() chosen_features = SortedSet() for i in range(start_index, end_index): chosen_features.add(self._features_count_list[i][1]) # then we need to build features matrix X = np.empty((len(self._urls), len(chosen_features))) for i in range(len(self._urls)): features = self._urls[self._urls_keys[i]][self._i_features] for j, fname in enumerate(chosen_features): if fname in features: X[i][j] = 1 else: X[i][j] = 0 # now we can run clustering y = self._clusterizer.fit_predict(X) # and we need to create uniform distributed queue def get_list_of_2_sets(): return [set(), set(), 0] # 0 is for used urls, # 1 is for unused # 3 is for total count url_in_cluster = defaultdict(get_list_of_2_sets) for i in range(len(y)): url = self._urls_keys[i] if self._urls[url][self._i_is_used]: url_in_cluster[y[i]][self._i_list_for_used].add(url) else: url_in_cluster[y[i]][self._i_list_for_unused].add(url) url_in_cluster[y[i]][self._i_list_total] += 1 limit = self._subqueue_len cluster_keys = SortedKeyList(url_in_cluster.keys(), key=lambda x: -len(url_in_cluster[x][self._i_list_for_used])) while limit > 0: # Todo: optimize if len(cluster_keys) > 0: less_index = cluster_keys.pop() unused_urls = url_in_cluster[less_index][self._i_list_for_unused] if len(unused_urls) > 0: url = unused_urls.pop() self._subqueue.put(url) limit -= 1 if len(unused_urls) > 0: url_in_cluster[less_index][self._i_list_for_used].add(url) cluster_keys.add(less_index) else: break async def _run_and_wait_clustering(self): t = threading.Thread(target=UrlClusterizedQueue._run_clustering, args=(self,)) t.run() while t.is_alive(): await asyncio.sleep(0.3) async def get(self): if len(self._urls) < self._min_urls_count: return self._next_queue_fallback() else: if self._subqueue.empty(): await self._run_and_wait_clustering() # if self._subqueue.qsize() == 1: # asyncio.create_task(self._run_and_wait_clustering()) return self._return_url(self._subqueue.get()) async def empty(self): if self._index + 1 >= len(self._urls): return True else: return False async def put(self, url): if url in self._urls: return features = url_features.extract(url) for fname in features: if fname in self._features_count_dict: fcount = self._features_count_dict[fname] del self._features_count_list[self._features_count_list.index((fcount, fname))] else: fcount = 0 fcount += 1 self._features_count_dict[fname] = fcount self._features_count_list.add((fcount, fname)) self._urls[url] = [features, False] # False is for used self._urls_keys.append(url)
class Timeline: """ Ordered set of segments. A timeline can be seen as an ordered set of non-empty segments (Segment). Segments can overlap -- though adding an already exisiting segment to a timeline does nothing. Parameters ---------- segments : Segment iterator, optional initial set of (non-empty) segments uri : string, optional name of segmented resource Returns ------- timeline : Timeline New timeline """ @classmethod def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None) -> 'Timeline': segments = list(df[PYANNOTE_SEGMENT]) timeline = cls(segments=segments, uri=uri) return timeline def __init__(self, segments: Optional[Iterable[Segment]] = None, uri: str = None): if segments is None: segments = () # set of segments (used for checking inclusion) segments_set = set(segments) if any(not segment for segment in segments_set): raise ValueError('Segments must not be empty.') self.segments_set_ = segments_set # sorted list of segments (used for sorted iteration) self.segments_list_ = SortedList(segments_set) # sorted list of (possibly redundant) segment boundaries boundaries = (boundary for segment in segments_set for boundary in segment) self.segments_boundaries_ = SortedList(boundaries) # path to (or any identifier of) segmented resource self.uri: str = uri def __len__(self): """Number of segments >>> len(timeline) # timeline contains three segments 3 """ return len(self.segments_set_) def __nonzero__(self): return self.__bool__() def __bool__(self): """Emptiness >>> if timeline: ... # timeline is empty ... else: ... # timeline is not empty """ return len(self.segments_set_) > 0 def __iter__(self) -> Iterable[Segment]: """Iterate over segments (in chronological order) >>> for segment in timeline: ... # do something with the segment See also -------- :class:`pyannote.core.Segment` describes how segments are sorted. """ return iter(self.segments_list_) def __getitem__(self, k: int) -> Segment: """Get segment by index (in chronological order) >>> first_segment = timeline[0] >>> penultimate_segment = timeline[-2] """ return self.segments_list_[k] def __eq__(self, other: 'Timeline'): """Equality Two timelines are equal if and only if their segments are equal. >>> timeline1 = Timeline([Segment(0, 1), Segment(2, 3)]) >>> timeline2 = Timeline([Segment(2, 3), Segment(0, 1)]) >>> timeline3 = Timeline([Segment(2, 3)]) >>> timeline1 == timeline2 True >>> timeline1 == timeline3 False """ return self.segments_set_ == other.segments_set_ def __ne__(self, other: 'Timeline'): """Inequality""" return self.segments_set_ != other.segments_set_ def index(self, segment: Segment) -> int: """Get index of (existing) segment Parameters ---------- segment : Segment Segment that is being looked for. Returns ------- position : int Index of `segment` in timeline Raises ------ ValueError if `segment` is not present. """ return self.segments_list_.index(segment) def add(self, segment: Segment) -> 'Timeline': """Add a segment (in place) Parameters ---------- segment : Segment Segment that is being added Returns ------- self : Timeline Updated timeline. Note ---- If the timeline already contains this segment, it will not be added again, as a timeline is meant to be a **set** of segments (not a list). If the segment is empty, it will not be added either, as a timeline only contains non-empty segments. """ segments_set_ = self.segments_set_ if segment in segments_set_ or not segment: return self segments_set_.add(segment) self.segments_list_.add(segment) segments_boundaries_ = self.segments_boundaries_ segments_boundaries_.add(segment.start) segments_boundaries_.add(segment.end) return self def remove(self, segment: Segment) -> 'Timeline': """Remove a segment (in place) Parameters ---------- segment : Segment Segment that is being removed Returns ------- self : Timeline Updated timeline. Note ---- If the timeline does not contain this segment, this does nothing """ segments_set_ = self.segments_set_ if segment not in segments_set_: return self segments_set_.remove(segment) self.segments_list_.remove(segment) segments_boundaries_ = self.segments_boundaries_ segments_boundaries_.remove(segment.start) segments_boundaries_.remove(segment.end) return self def discard(self, segment: Segment) -> 'Timeline': """Same as `remove` See also -------- :func:`pyannote.core.Timeline.remove` """ return self.remove(segment) def __ior__(self, timeline: 'Timeline') -> 'Timeline': return self.update(timeline) def update(self, timeline: Segment) -> 'Timeline': """Add every segments of an existing timeline (in place) Parameters ---------- timeline : Timeline Timeline whose segments are being added Returns ------- self : Timeline Updated timeline Note ---- Only segments that do not already exist will be added, as a timeline is meant to be a **set** of segments (not a list). """ segments_set = self.segments_set_ segments_set |= timeline.segments_set_ # sorted list of segments (used for sorted iteration) self.segments_list_ = SortedList(segments_set) # sorted list of (possibly redundant) segment boundaries boundaries = (boundary for segment in segments_set for boundary in segment) self.segments_boundaries_ = SortedList(boundaries) return self def __or__(self, timeline: 'Timeline') -> 'Timeline': return self.union(timeline) def union(self, timeline: 'Timeline') -> 'Timeline': """Create new timeline made of union of segments Parameters ---------- timeline : Timeline Timeline whose segments are being added Returns ------- union : Timeline New timeline containing the union of both timelines. Note ---- This does the same as timeline.update(...) except it returns a new timeline, and the original one is not modified. """ segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) >>> for segment1, segment2 in timeline1.co_iter(timeline2): ... print(segment1, segment2) (<Segment(0, 2)>, <Segment(1, 3)>) (<Segment(1, 2)>, <Segment(1, 3)>) (<Segment(3, 4)>, <Segment(3, 5)>) Parameters ---------- other : Timeline Second timeline Returns ------- iterable : (Segment, Segment) iterable Yields pairs of intersecting segments in chronological order. """ for segment in self.segments_list_: # iterate over segments that starts before 'segment' ends temp = Segment(start=segment.end, end=segment.end) for other_segment in other.segments_list_.irange(maximum=temp): if segment.intersects(other_segment): yield segment, other_segment def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ -> Iterator[Union[Tuple[Segment, Segment], Segment]]: """Like `crop` but returns a segment iterator instead See also -------- :func:`pyannote.core.Timeline.crop` """ if mode not in {'loose', 'strict', 'intersection'}: raise ValueError("Mode must be one of 'loose', 'strict', or " "'intersection'.") if not isinstance(support, (Segment, Timeline)): raise TypeError("Support must be a Segment or a Timeline.") if isinstance(support, Segment): # corner case where "support" is empty if support: segments = [support] else: segments = [] support = Timeline(segments=segments, uri=self.uri) for yielded in self.crop_iter(support, mode=mode, returns_mapping=returns_mapping): yield yielded return # loose mode if mode == 'loose': for segment, _ in self.co_iter(support): yield segment return # strict mode if mode == 'strict': for segment, other_segment in self.co_iter(support): if segment in other_segment: yield segment return # intersection mode for segment, other_segment in self.co_iter(support): mapped_to = segment & other_segment if not mapped_to: continue if returns_mapping: yield segment, mapped_to else: yield mapped_to def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: """Crop timeline to new support Parameters ---------- support : Segment or Timeline If `support` is a `Timeline`, its support is used. mode : {'strict', 'loose', 'intersection'}, optional Controls how segments that are not fully included in `support` are handled. 'strict' mode only keeps fully included segments. 'loose' mode keeps any intersecting segment. 'intersection' mode keeps any intersecting segment but replace them by their actual intersection. returns_mapping : bool, optional In 'intersection' mode, return a dictionary whose keys are segments of the cropped timeline, and values are list of the original segments that were cropped. Defaults to False. Returns ------- cropped : Timeline Cropped timeline mapping : dict When 'returns_mapping' is True, dictionary whose keys are segments of 'cropped', and values are lists of corresponding original segments. Examples -------- >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) >>> timeline.crop(Segment(1, 3)) <Timeline(uri=None, segments=[<Segment(1, 2)>])> >>> timeline.crop(Segment(1, 3), mode='loose') <Timeline(uri=None, segments=[<Segment(0, 2)>, <Segment(1, 2)>])> >>> timeline.crop(Segment(1, 3), mode='strict') <Timeline(uri=None, segments=[<Segment(1, 2)>])> >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True) >>> print(mapping) {<Segment(1, 2)>: [<Segment(0, 2)>, <Segment(1, 2)>]} """ if mode == 'intersection' and returns_mapping: segments, mapping = [], {} for segment, mapped_to in self.crop_iter(support, mode='intersection', returns_mapping=True): segments.append(mapped_to) mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment] return Timeline(segments=segments, uri=self.uri), mapping return Timeline(segments=self.crop_iter(support, mode=mode), uri=self.uri) def overlapping(self, t: float) -> List[Segment]: """Get list of segments overlapping `t` Parameters ---------- t : float Timestamp, in seconds. Returns ------- segments : list List of all segments of timeline containing time t """ return list(self.overlapping_iter(t)) def overlapping_iter(self, t: float) -> Iterator[Segment]: """Like `overlapping` but returns a segment iterator instead See also -------- :func:`pyannote.core.Timeline.overlapping` """ segment = Segment(start=t, end=t) for segment in self.segments_list_.irange(maximum=segment): if segment.overlaps(t): yield segment def __str__(self): """Human-readable representation >>> timeline = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) >>> print(timeline) [[ 00:00:00.000 --> 00:00:10.000] [ 00:00:01.000 --> 00:00:13.370]] """ n = len(self.segments_list_) string = "[" for i, segment in enumerate(self.segments_list_): string += str(segment) string += "\n " if i + 1 < n else "" string += "]" return string def __repr__(self): """Computer-readable representation >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) <Timeline(uri=None, segments=[<Segment(0, 10)>, <Segment(1, 13.37)>])> """ return "<Timeline(uri=%s, segments=%s)>" % (self.uri, list(self.segments_list_)) def __contains__(self, included: Union[Segment, 'Timeline']): """Inclusion Check whether every segment of `included` does exist in timeline. Parameters ---------- included : Segment or Timeline Segment or timeline being checked for inclusion Returns ------- contains : bool True if every segment in `included` exists in timeline, False otherwise Examples -------- >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) >>> timeline2 = Timeline(segments=[Segment(0, 10)]) >>> timeline1 in timeline2 False >>> timeline2 in timeline1 >>> Segment(1, 13.37) in timeline1 True """ if isinstance(included, Segment): return included in self.segments_set_ elif isinstance(included, Timeline): return self.segments_set_.issuperset(included.segments_set_) else: raise TypeError( 'Checking for inclusion only supports Segment and ' 'Timeline instances') def empty(self) -> 'Timeline': """Return an empty copy Returns ------- empty : Timeline Empty timeline using the same 'uri' attribute. """ return Timeline(uri=self.uri) def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ -> 'Timeline': """Get a copy of the timeline If `segment_func` is provided, it is applied to each segment first. Parameters ---------- segment_func : callable, optional Callable that takes a segment as input, and returns a segment. Defaults to identity function (segment_func(segment) = segment) Returns ------- timeline : Timeline Copy of the timeline """ # if segment_func is not provided # just add every segment if segment_func is None: return Timeline(segments=self.segments_list_, uri=self.uri) # if is provided # apply it to each segment before adding them return Timeline(segments=[segment_func(s) for s in self.segments_list_], uri=self.uri) def extent(self) -> Segment: """Extent The extent of a timeline is the segment of minimum duration that contains every segments of the timeline. It is unique, by definition. The extent of an empty timeline is an empty segment. A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.extent() |--------------------------------| Returns ------- extent : Segment Timeline extent Examples -------- >>> timeline = Timeline(segments=[Segment(0, 1), Segment(9, 10)]) >>> timeline.extent() <Segment(0, 10)> """ if self.segments_set_: segments_boundaries_ = self.segments_boundaries_ start = segments_boundaries_[0] end = segments_boundaries_[-1] return Segment(start=start, end=end) else: import numpy as np return Segment(start=np.inf, end=-np.inf) def support_iter(self) -> Iterator[Segment]: """Like `support` but returns a segment generator instead See also -------- :func:`pyannote.core.Timeline.support` """ # The support of an empty timeline is an empty timeline. if not self: return # Principle: # * gather all segments with no gap between them # * add one segment per resulting group (their union |) # Note: # Since segments are kept sorted internally, # there is no need to perform an exhaustive segment clustering. # We just have to consider them in their natural order. # Initialize new support segment # as very first segment of the timeline new_segment = self.segments_list_[0] for segment in self: # If there is no gap between new support segment and next segment, if not (segment ^ new_segment): # Extend new support segment using next segment new_segment |= segment # If there actually is a gap, else: yield new_segment # Initialize new support segment as next segment # (right after the gap) new_segment = segment # Add new segment to the timeline support yield new_segment def support(self) -> 'Timeline': """Timeline support The support of a timeline is the timeline with the minimum number of segments with exactly the same time span as the original timeline. It is (by definition) unique and does not contain any overlapping segments. A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.support() |------| |--------| |----------| Returns ------- support : Timeline Timeline support """ return Timeline(segments=self.support_iter(), uri=self.uri) def duration(self) -> float: """Timeline duration The timeline duration is the sum of the durations of the segments in the timeline support. Returns ------- duration : float Duration of timeline support, in seconds. """ # The timeline duration is the sum of the durations # of the segments in the timeline support. return sum(s.duration for s in self.support_iter()) def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: """Like `gaps` but returns a segment generator instead See also -------- :func:`pyannote.core.Timeline.gaps` """ if support is None: support = self.extent() if not isinstance(support, (Segment, Timeline)): raise TypeError("unsupported operand type(s) for -':" "%s and Timeline." % type(support).__name__) # segment support if isinstance(support, Segment): # `end` is meant to store the end time of former segment # initialize it with beginning of provided segment `support` end = support.start # support on the intersection of timeline and provided segment for segment in self.crop(support, mode='intersection').support(): # add gap between each pair of consecutive segments # if there is no gap, segment is empty, therefore not added gap = Segment(start=end, end=segment.start) if gap: yield gap # keep track of the end of former segment end = segment.end # add final gap (if not empty) gap = Segment(start=end, end=support.end) if gap: yield gap # timeline support elif isinstance(support, Timeline): # yield gaps for every segment in support of provided timeline for segment in support.support(): for gap in self.gaps_iter(support=segment): yield gap def gaps(self, support: Optional[Support] = None) \ -> 'Timeline': """Gaps A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.gaps() |--| |--| Parameters ---------- support : None, Segment or Timeline Support in which gaps are looked for. Defaults to timeline extent Returns ------- gaps : Timeline Timeline made of all gaps from original timeline, and delimited by provided support See also -------- :func:`pyannote.core.Timeline.extent` """ return Timeline(segments=self.gaps_iter(support=support), uri=self.uri) def segmentation(self) -> 'Timeline': """Segmentation Create the unique timeline with same support and same set of segment boundaries as original timeline, but with no overlapping segments. A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.segmentation() |-|--|-| |-|---|--| |--|----|--| Returns ------- timeline : Timeline (unique) timeline with same support and same set of segment boundaries as original timeline, but with no overlapping segments. """ # COMPLEXITY: O(n) support = self.support() # COMPLEXITY: O(n.log n) # get all boundaries (sorted) # |------| |------| |----| # |--| |-----| |----------| # becomes # | | | | | | | | | | | | timestamps = set([]) for (start, end) in self: timestamps.add(start) timestamps.add(end) timestamps = sorted(timestamps) # create new partition timeline # | | | | | | | | | | | | # becomes # |-|--|-| |-|---|--| |--|----|--| # start with an empty copy timeline = Timeline(uri=self.uri) if len(timestamps) == 0: return Timeline(uri=self.uri) segments = [] start = timestamps[0] for end in timestamps[1:]: # only add segments that are covered by original timeline segment = Segment(start=start, end=end) if segment and support.overlapping(segment.middle): segments.append(segment) # next segment... start = end return Timeline(segments=segments, uri=self.uri) def to_annotation(self, generator: Union[str, Iterable[Label], None, None] = 'string', modality: Optional[str] = None) \ -> 'Annotation': """Turn timeline into an annotation Each segment is labeled by a unique label. Parameters ---------- generator : 'string', 'int', or iterable, optional If 'string' (default) generate string labels. If 'int', generate integer labels. If iterable, use it to generate labels. modality : str, optional Returns ------- annotation : Annotation Annotation """ from .annotation import Annotation annotation = Annotation(uri=self.uri, modality=modality) if generator == 'string': from .utils.generators import string_generator generator = string_generator() elif generator == 'int': from .utils.generators import int_generator generator = int_generator() for segment in self: annotation[segment] = next(generator) return annotation def write_uem(self, file: TextIO): """Dump timeline to file using UEM format Parameters ---------- file : file object Usage ----- >>> with open('file.uem', 'w') as file: ... timeline.write_uem(file) """ uri = self.uri if self.uri else "<NA>" for segment in self: line = f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n" file.write(line) def for_json(self): """Serialization See also -------- :mod:`pyannote.core.json` """ data = {PYANNOTE_JSON: self.__class__.__name__} data[PYANNOTE_JSON_CONTENT] = [s.for_json() for s in self] if self.uri: data[PYANNOTE_URI] = self.uri return data @classmethod def from_json(cls, data): """Deserialization See also -------- :mod:`pyannote.core.json` """ uri = data.get(PYANNOTE_URI, None) segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]] return cls(segments=segments, uri=uri) def _repr_png_(self): """IPython notebook support See also -------- :mod:`pyannote.core.notebook` """ from .notebook import repr_timeline return repr_timeline(self)
def test_index_valueerror6(): slt = SortedList(range(10)) slt._reset(4) with pytest.raises(ValueError): slt.index(3, 5)