class RangeModule: def __init__(self): self.nums = SortedList() def addRange(self, left: int, right: int) -> None: l = self.nums.bisect_left(left) r = self.nums.bisect_right(right) for _ in range(r - l): self.nums.pop(l) if l % 2 == 0: self.nums.add(left) if r % 2 == 0: self.nums.add(right) def queryRange(self, left: int, right: int) -> bool: l = self.nums.bisect_right(left) r = self.nums.bisect_left(right) return l == r and l % 2 != 0 def removeRange(self, left: int, right: int) -> None: l = self.nums.bisect_left(left) r = self.nums.bisect_right(right) for _ in range(r - l): self.nums.pop(l) if l % 2 != 0: self.nums.add(left) if r % 2 != 0: self.nums.add(right)
class Novels: def __init__(self): self.__base = base self.novels = listdir(self.__base) self.__rank = SortedList(key=lambda t: -t[1]) # 红黑树 for no in self.novels: self.__rank.add([no, 0]) def novel_valid(self, name) -> bool: return name in self.novels def get_novel(self, name) -> Novel or None: if name not in self.novels: return None else: return Novel(name) def add_rank(self, novel_): for i in range(len(self.__rank)): if self.__rank[i][0] == novel_: n = self.__rank[i][1] + 1 self.__rank.pop(i) self.__rank.add([novel_, n]) def novels_popular(self): return self.__rank[0:16] def novels_rank(self): return list(self.__rank)
class RangeList: """Add and query non-overlapping intervals. Intervals are semi-closed, e.g. the interval [1, 3) contains the points {1, 2}. """ def __init__(self, init=None): self.data = SortedList(init, key=lambda x: x[0]) def add(self, start, end): left = self.data.bisect_right((start, 0)) if left > 0: if self.data[left - 1][1] >= start: start = self.data[left - 1][0] left -= 1 right = self.data.bisect_right((end, 0)) if right > 0: if self.data[right - 1][1] >= end: end = self.data[right - 1][1] for _ in range(right - left): self.data.pop(left) self.data.add((start, end)) def list(self): return list(self.data) def iter(self): return self.data.islice(start=0)
class OrderBook(object): def __init__(self): self.buy_orders = SortedList() self.sell_orders = SortedList() self.buy_mapping = {} self.sell_mapping = {} def add_order(self, order): if order.order_id in self.buy_mapping or order.order_id in self.sell_mapping: raise DuplicateOrderException if order.order_side == ORDER_BUY_SIDE: return self._try_buy(order) else: return self._try_sell(order) def cancel_order(self, order_id): if order_id in self.buy_mapping: order = self.buy_mapping[order_id] self.buy_orders.discard(order) del self.buy_mapping[order_id] return if order_id in self.sell_mapping: order = self.sell_mapping[order_id] self.sell_orders.discard(order) del self.sell_mapping[order_id] return raise UnknownOrderException def _try_buy(self, order): trades = [] while self.sell_orders and order.price >= self.sell_orders[ 0].price and order.quantity > 0: resting_order = self.sell_orders[0] trades.append(self._trade(order, resting_order)) if resting_order.quantity == 0: self.sell_orders.pop(0) if order.quantity > 0: self.buy_orders.add(order) self.buy_mapping[order.order_id] = order return trades def _trade(self, order, resting_order): quantity = min(resting_order.quantity, order.quantity) price = resting_order.price resting_order.quantity -= quantity order.quantity -= quantity return Trade(quantity, price, order, resting_order) def _try_sell(self, order): trades = [] while self.buy_orders and order.price <= self.buy_orders[ 0].price and order.quantity > 0: resting_order = self.buy_orders[0] trades.append(self._trade(order, resting_order)) if resting_order.quantity == 0: self.buy_orders.pop(0) if order.quantity > 0: self.sell_orders.add(order) self.sell_mapping[order.order_id] = order return trades
def solve(n, k): if k > 2*n/3: return 0, 0 s = SortedList() s.add(n) for i in range(k - 1): maxL = s.pop() half = maxL / 2 s.add(half) if maxL % 2 == 0: s.add(half - 1) else: s.add(half) maxL = s.pop() half = maxL / 2 if maxL == 0: x = y = 0 elif maxL % 2 == 0: x = half y = half - 1 else: x = y = maxL / 2 return(max(x, y), min(x, y))
def bottom_sketch(seq, k, s, hf=mmhash): """Compute the bottom s sketch of the seqeunce using k-mers. Arguments: seq (bytes): Seqeunce to be sketched. k (int): k-mer size s (int): Sketch size, i.e. the number of hash values kept in the minimums list. hf (function): Hash function to be used. Default mmh3.hash with a fixed seed. Returns: SortedList: the s smallest hash values. """ kmers = dp.qgrams(seq, k) # initialize the minimal hash value list with the first s values first_s_kmers = [kmers.__next__() for _ in range(s)] mins = SortedList([hf(kmer) for kmer in first_s_kmers]) biggest_min = mins[-1] # traverse the remaining kmers for index, kmer in enumerate(kmers, s+1): hv = hf(kmer) if hv < biggest_min: # if the new has value is smaller than the biggest minimizer in # the list remove the biggest minimizer and add the new hv to # the sorted list mins.pop() # remove the biggest minimizer (at the last position) mins.add(hv) biggest_min = mins[-1] return mins
class TransactionPool: def __init__(self, limit=1_000_000): self._pool = SortedList(key=lambda t: t.gas_price) self.limit = limit def add_txs(self, txs): self._pool.update(txs) if len(self._pool) > self.limit: overflow = len(self._pool) - self.limit del self._pool[:overflow] def pop_most_valuable_txs(self, total_gas_target=None): if total_gas_target is None: return self._pool.pop() elif isinstance(total_gas_target, int) and total_gas_target > 0: result = [] while True: if len(self._pool) == 0: break if ( sum(i.gas_used for i in result) + self._pool[-1].gas_used > total_gas_target ): break result.append(self._pool.pop()) return result else: raise Exception("Invalid gas target") def get_size(self): return len(self._pool)
def busiestServers(self, k: int, arrival: List[int], load: List[int]) -> List[int]: from sortedcontainers import SortedList heap = [] servers = SortedList(range(k)) requests = [0] * k for i in range(len(arrival)): start = arrival[i] duration = load[i] while heap and heap[0][0] <= start: servers.add(heap[0][1]) heapq.heappop(heap) if not servers: continue idx = servers.bisect_left(i % k) if idx == len(servers): idx = 0 requests[servers[idx]] += 1 heapq.heappush(heap, [start + duration, servers[idx]]) servers.pop(idx) maxreq = max(requests) res = [] for i in range(k): if requests[i] == maxreq: res.append(i) return res
def thirdMax2(self, nums: List[int]) -> int: s = SortedList() for num in nums: if num not in s: s.add(num) if len(s) > 3: s.pop(0) return s[0] if len(s) == 3 else s[-1]
class OnlineMedianFinder: def __init__(self): self.currentMedian = 0 self.bigElements = SortedList() self.smallElements = SortedList() self.balance = 0 def add_element(self, newval): if self.balance == 0: if newval < self.currentMedian: self.smallElements.add(newval) self.currentMedian = self.smallElements[-1] self.balance = -1 else: self.bigElements.add(newval) self.currentMedian = self.bigElements[0] self.balance = +1 return elif self.balance == +1: if newval <= self.currentMedian: self.smallElements.add(newval) else: self.bigElements.add(newval) self.smallElements.add(self.bigElements.pop(0)) elif self.balance == -1: if newval > self.currentMedian: self.bigElements.add(newval) else: self.smallElements.add(newval) self.bigElements.add(self.smallElements.pop(-1)) self.currentMedian = self.bigElements[0] self.balance = 0 def remove_element(self, oldval): if self.balance == 0: if oldval < self.currentMedian: self.smallElements.remove(oldval) self.currentMedian = self.bigElements[0] self.balance = +1 else: self.bigElements.remove(oldval) self.currentMedian = self.smallElements[-1] self.balance = -1 return elif self.balance == +1: if oldval < self.currentMedian: self.smallElements.remove(oldval) self.smallElements.add(self.bigElements.pop(0)) else: self.bigElements.remove(oldval) elif self.balance == -1: if oldval >= self.currentMedian: self.bigElements.remove(oldval) self.bigElements.add(self.smallElements.pop(-1)) else: self.smallElements.remove(oldval) self.currentMedian = self.bigElements[0] self.balance = 0
class MAE_AVL(object): def __init__(self, y): y = sorted(y) self.median = np.median(y) idx = int(math.ceil(len(y) / 2.)) self.less_than = SortedList() self.less_than.update(y[:idx]) self.less_than_sum = sum(y[:idx]) self.less_than_items = len(self.less_than) self.greater_than = SortedList() self.greater_than.update(y[idx:]) self.greater_than_sum = sum(y[idx:]) self.greater_than_items = len(self.greater_than) def _update(self, values, add_or_remove_fn, add_or_subtract_values): for value in values: if self.less_than and value <= self.less_than[-1]: add_or_remove_fn(self.less_than, value) self.less_than_sum = add_or_subtract_values(self.less_than_sum, value) self.less_than_items = add_or_subtract_values(self.less_than_items, 1) else: add_or_remove_fn(self.greater_than, value) self.greater_than_sum = add_or_subtract_values(self.greater_than_sum, value) self.greater_than_items = add_or_subtract_values(self.greater_than_items, 1) if len(self.less_than) > len(self.greater_than)+1: while len(self.less_than) > len(self.greater_than)+1: x = self.less_than.pop(index=-1) self.greater_than.add(x) self.less_than_sum -= x self.greater_than_sum += x self.less_than_items -= 1 self.greater_than_items += 1 elif len(self.greater_than) > len(self.less_than): while len(self.greater_than) > len(self.less_than): x = self.greater_than.pop(index=0) self.less_than.add(x) self.less_than_sum += x self.greater_than_sum -= x self.less_than_items += 1 self.greater_than_items -= 1 if len(self.less_than) > len(self.greater_than): self.median = self.less_than[-1] else: self.median = (self.less_than[-1] + self.greater_than[0]) / 2. def remove(self, values): self._update(values, SortedList.remove, operator.sub) def add(self, values): self._update(values, SortedList.add, operator.add)
class Population: def __init__(self, display_strategy: DisplayStrategy, loss: Loss, storage: Iterable[np.ndarray] = None): self.__loss = loss storage = [display_strategy.encode(element) for element in storage] self.__storage = SortedList([(self.loss.call(element), element) for element in storage] if storage is not None else [], key=lambda x: x[0]) @property def loss(self): return self.__loss @property def count(self): return len(self.__storage) def add(self, specimen: np.ndarray): self.add_raw(entry=(self.loss.call(specimen), specimen)) def add_raw(self, entry: Tuple[float, np.ndarray]): self.__storage.add(entry) def kill(self, index: int = -1): del self.__storage[index] def kill_many(self, indices): count = 0 for index in sorted(set(indices)): del self.__storage[index - count] count += 1 def kill_last(self, count: int = 1): if count < 0: raise RuntimeError(f"Can't kill the last {count} specimens, it's a negative number!") count = min(len(self.__storage), count) for _ in range(count): self.kill() def pop(self, index: int = -1): self.__storage.pop(index) def __getitem__(self, key): if isinstance(key, int): return self.__storage[key] elif isinstance(key, slice): return self.__storage[key.start: key.stop: key.step] else: to_return = list() for entry in key: to_return.append(self.__storage[entry]) return to_return
class SortedTradesLevel(DequeLevel): def __init__(self): super(SortedTradesLevel, self).__init__() self.orders = SortedList(key=lambda trade: trade[T_PRICE]) def _add(self, order: list): self.orders.add(order) def _remove_first(self): self.orders.pop()
def lis(x, nd=False, inv=False): from sortedcontainers import SortedList sl = SortedList(key=lambda x: -x) if inv else SortedList() for i in x: sl.add(i) if nd: j = sl.bisect_right(i) else: j = sl.bisect_left(i) + 1 if j < len(sl): sl.pop(j) return len(sl)
def check(k): w = SortedList(workers[-k:]) cnt = pills for i in range(k - 1, -1, -1): if w[-1] >= tasks[i]: w.pop() elif cnt and (idx := w.bisect_left(tasks[i] - strength)) != len(w): w.pop(idx) cnt -= 1 else: return False
def test_pop(): slt = SortedList(range(10), load=4) slt._check() assert slt.pop() == 9 slt._check() assert slt.pop(0) == 0 slt._check() assert slt.pop(-2) == 7 slt._check() assert slt.pop(4) == 5 slt._check()
def minMeetingRooms(self, intervals: List[List[int]]) -> int: intervals.sort() q = SortedList() best = 0 for start, end in intervals: while q and q[0] <= start: q.pop(0) num_intersects = len(q) best = max(best, num_intersects + 1) q.add(end) return best
def lastStoneWeight(self, stones: List[int]) -> int: from sortedcontainers import SortedList a = SortedList() for i in stones: a.add(i) while a: if len(a) == 1: return a.pop() x = a.pop() y = a.pop() if x != y: a.add(x - y)
def main(): if not resumethreads: preprocesscsvdata(unprocessed_stats_path,processed_stats_path) else: loadprocessedcsvdata(processed_stats_path) if len(seededchoices_names) > 0: nameidx = processedheaders.index('Name') for seedname in seededchoices_names: foundname = False for i in range(1,len(processedcsv)): rowname = processedcsv[i][nameidx] if rowname == seedname: seededchoices.append(i-1) foundname = True break if not foundname: print('Couldnt find ' + seedname + ' in csv data') teamstatsScoreidx = teamstatsheaders.index('Score') sortedScoreidx = setsize + teamstatsScoreidx subsetpool = list(range(len(processedcsv) - 1 - len(seededchoices)))[::-1] totalcombinations = IndexedCombination( setsize - len(seededchoices), subsetpool).totalcombinations if len(seededchoices) > 0: print(str(int(totalcombinations)) + ' combinations using ' + str(len(processedcsv) - 1 ) + ' pkmn and ' + str(len(seededchoices)) + '/6 preselected') else: print(str(int(totalcombinations)) + ' combinations using ' + str(len(processedcsv) - 1 ) + ' pkmn') bestScores = SortedList(key=lambda x: -x[sortedScoreidx]) nthreads = 1 nthread = 0 logfilepath = 'resumedata.' + str(nthread) + '.pickle' threaddata = [[]] * nthreads istart = int( nthread*totalcombinations/nthreads) iend = int( (nthread + 1)*totalcombinations/nthreads) setrange = [istart, iend, nthread == nthreads - 1, logfilepath] #print( 'thread ' + str(nthread+1) + ' processing ' + str(setrange)) threadres = processthread(setrange) #print( 'thread ' + str(nthread+1) + ' finished') for someset in threadres[0]: bestScores.add(someset) if len(bestScores) > maxresultsize: bestScores.pop() printSortedList(os.path.join(currentdir,team_results_path), bestScores) print('done! ')
def check(tasks, workers, pills, strength, x): w = SortedList(workers[-x:]) for task in tasks[-x:]: # greedily assign i = w.bisect_left(task) if i != len(w): w.pop(i) continue if pills: i = w.bisect_left(task-strength) if i != len(w): w.pop(i) pills -= 1 continue return False return True
class MaxBuffer: def __init__(self, size: int, filter_size: int): self.size = size self.data = SortedList(key=lambda v: -v.value) self.queue = MaxQueue(filter_size) self.worst = None self.filter_size = filter_size self.last_item: Optional[MaxBufferItem] = None def append(self, value: float, frame_no: int, data: Any) -> None: filtered = self.queue.push(MaxBufferItem(frame_no, value, data)) if filtered is not None: self.data.add(filtered) if len(self.data) > self.size: self.data.pop(0)
def score_of_a_vacated_people(self, universo, work='translations'): factor = math.sqrt(len(universo)) scores = SortedList(load=round(factor)) for (people, score) in self.__scores__().items(): if people in universo: scores.add(TranslatorScore(people, score[work])) return scores.pop(0)
def uniform_cost_graph(problem, iteration_limit=10000): starting_node = Node(problem.initial_state(), 0) if problem.goal_test(starting_node.state): return starting_node.solution() frontier = SortedList([starting_node], key=lambda n: -n.path_cost) explored = set() iterations = 0 while frontier: node = frontier.pop() if problem.goal_test(node.state): return node.solution() explored.add(node.state) for action in problem.actions(node.state): child = node.get_child(problem, action) child_in_frontier = child.state in [n.state for n in frontier] if (child.state not in explored and not child_in_frontier): frontier.add(child) elif child_in_frontier: index = [n.state for n in frontier].index(child.state) other_node = frontier[index] if child.path_cost < other_node.path_cost: frontier.remove(other_node) frontier.add(child) iterations += 1 if iterations > iteration_limit: raise NoSolutionError(iterations=iterations, iteration_limited=True) raise NoSolutionError(iterations=iterations)
class FreqStack: def __init__(self): self.sl = SortedList() # (count, i, num) self.i = 0 self.pos = defaultdict(list) # {num : list of i} def push(self, x: int) -> None: if self.pos[x]: pos = self.pos[x] item = (len(pos), pos[-1], x) self.sl.remove(item) item = (item[0] + 1, self.i, x) self.sl.add(item) self.pos[x].append(self.i) else: self.sl.add((1, self.i, x)) self.pos[x] = [self.i] self.i += 1 def pop(self) -> int: item = self.sl.pop() x = item[2] self.pos[x].pop() if self.pos[x]: item = (item[0] - 1, self.pos[x][-1], x) self.sl.add(item) return x
def solve(n, k): slots = SortedList([n]) for i in range(k): res = best(slots.pop()) slots.update(r for r in res if r != 0) return sorted(res)
class FreeListAllocator: """An abstract allocator that allocates individual items from a block. Instead of maintaining a structure representing contiguous ranges, we can simply maintain a list of unallocated items. """ def __init__(self, capacity: int = 8): self.capacity = capacity self.freelist = SortedList(range(capacity)) def alloc(self) -> int: """Allocate an entry.""" if not self.freelist: err = NoCapacity() err.recommended = self.capacity + max(self.capacity // 2, 1) raise err return self.freelist.pop(0) def grow(self, new_capacity: int): """Tell the allocator about a new capacity.""" self.freelist.update(range(self.capacity, new_capacity)) self.capacity = new_capacity def release(self, idx: int): """Deallocate an index.""" assert idx not in self.freelist self.freelist.add(idx)
class FreqStack: def __init__(self): self.occurences = defaultdict(list) #ocucrences[i] = [1, 5, 6] means arr[1] = arr[5] = arr[6] = i self.val_to_key = {} # val -> (freq, last_oc, val) self.key_to_val = SortedDict() #(freq, last_oc, val) -> val self.keys = SortedList() self.i = 0 def push(self, x: int) -> None: if x not in self.val_to_key: self.val_to_key[x] = (1, self.i, x) self.key_to_val[(1, self.i, x)] = x self.keys.add((1, self.i, x)) else: oldfreq, oldlast_oc, _ = self.val_to_key[x] del self.key_to_val[(oldfreq, oldlast_oc, x)] self.keys.remove((oldfreq, oldlast_oc, x)) self.val_to_key[x] = (oldfreq + 1, self.i, x) self.key_to_val[(oldfreq + 1, self.i, x)] = x self.keys.add((oldfreq + 1, self.i, x)) self.occurences[x].append(self.i) self.i += 1 def pop(self) -> int: freq, _, x = self.keys.pop() #self.key_to_val.popitem()[0] self.occurences[x].pop() if freq > 1: # print(x, freq, self.occurences[x]) self.key_to_val[(freq - 1, self.occurences[x][-1], x)] = x self.val_to_key[x] = (freq - 1, self.occurences[x][-1], x) self.keys.add((freq - 1, self.occurences[x][-1], x)) else: del self.val_to_key[x] return x
def a_star(starting_node, target_node, paths): """ A Star generic implementation (see https://en.wikipedia.org/wiki/A*_search_algorithm for more information) Finds a way from start to target by using the real cost of travelling and the heuristic values to determine a short path. :param starting_node: starting node :param target_node: target node :param paths: paths :return: ordered list of nodes leading to our goal. """ closed = list() # list that contains all visited nodes fringe = SortedList() # contains all found nodes fringe.append(starting_node) # append the starting node while True: if (len(fringe) == 0): # if the fringe is empty return None node = fringe.pop(0) if not any(node.id == it.id for it in closed): # if the current node is not in our closed list do: # print("Current Node= {}, Fringe={}".format(node,fringe)) - uncomment to see how it works! if (goal_check(node, target_node)): # if the current node is our target find the path return find_solution(node) else: # else find all neighboring nodes of the current node and add them to the fringe, finally add the curr node to closed. connected_nodes = get_connected_nodes(node, paths) for connected_node in connected_nodes: fringe.add(connected_node) closed.append(node) else: print("skipped node={}".format(node))
def solve(self): frontier = SortedList(key=lambda k: self.cost_function(k)) root = Tree(self.initial_state_) frontier.add(root) i = 0 while (True): len(frontier) == 0 and print( "FRONTIER EMPTY AFTER %d ITERATIONS" % i) node = frontier.pop(0) game = node.root if self.already_visited(game): continue else: self.mark_as_visited(game) i += 1 if i % 50000 == 0: print("iterations: %d, frontier: %d" % (i, len(frontier))) if game.finished(): out = [] current = node while current.parent is not None: out.append(current) current = current.parent out.append(current) out.reverse() return [n.root for n in out] successors = self.actions(game) for s in successors: new_game = game.duplicate() self.apply_action(new_game, s) new_node = Tree(new_game) node.add_child(new_node) frontier.add(new_node)
def b(): g = defaultdict(set) gr = defaultdict(set) for line in lines: g[line[5]].add(line[36]) gr[line[36]].add(line[5]) workers = [0, 0, 0, 0, 0] s = SortedList((0, n) for n in g.keys() if len(gr[n]) == 0) order = SortedList([]) while s: mtts, n = s.pop(0) mworker = None mw = math.inf for i, wt in enumerate(workers): if wt < mw: mworker = i mw = wt tts = max(mw, mtts) tte = tts + w(n) workers[mworker] = tte order.add((tte, n)) edges = list(g[n]) for m in edges: g[n].remove(m) gr[m].remove(n) if len(gr[m]) == 0: s.add((tte, m)) return order[-1][0]
class DijkstraFixedPoint: def __init__(self, automaton, initial_set, accepted_set): self.automaton = automaton self.set_to_visit = SortedList(initial_set,key= lambda d: -len(d)) self.accepted_set = accepted_set def iter_fix_point_set(self,max_size=10): if len(self.set_to_visit)==0: raise StopIteration() F = self.set_to_visit.pop() nF = {k:[v] for k,v in F.items()} new_size_of_fp = len(nF) reach_accepted_set = False for u,lu in F.items(): labelled_edges = self.automaton.get_labelled_successors(u) succ = labelled_edges[lu] for s in succ: if s in self.accepted_set: reach_accepted_set = True if (s not in nF) and (s not in self.accepted_set): nF[s] = list(self.automaton.get_successor_labels(s)) new_size_of_fp = len(nF) if new_size_of_fp>max_size: return False,F newF = self.expand_successor_set(nF) if F in newF: newF.remove(F) self.set_to_visit.update(newF) accept_fix_point = (len(newF)==0) and reach_accepted_set return accept_fix_point,F def expand_successor_set(self,nF): sF = [] # import operator # size = reduce(operator.mul, [len(v) for v in nF.values()], 1) for conf in itertools.product(*nF.values()): sF.append({k:v for k,v in zip(nF.keys(),conf)}) return sF def __iter__(self): return self def next(self): return self.iter_fix_point_set() def next_fixed_point(self,max_size): fp_found = 0 try: while fp_found==False: fp_found,fp = self.iter_fix_point_set(max_size) #print "#"*len(fp) except StopIteration: return False,None return fp_found,fp
class Store: def __init__(self,N=10): self.store = SortedList() self.N = N def add(self,item): self.store.add(item) if len(self.store) > self.N: self.store.pop(0) def pop(self,i): self.store.pop(i) def __len__(self): return len(self.store) def __getitem__(self,i): return self.store[i] def __str__(self): return str(self.store)
def test_pop_indexerror2(): slt = SortedList(range(10), load=4) slt.pop(10)
def test_pop_indexerror2(): slt = SortedList(range(10)) slt._reset(4) with pytest.raises(IndexError): slt.pop(10)
def test_pop_indexerror3(): slt = SortedList() with pytest.raises(IndexError): slt.pop()
def test_pop_indexerror3(): slt = SortedList() slt.pop()
class PriorityDict(MutableMapping): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. """ def __init__(self, *args, **kwargs): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. If the first argument is the boolean value False, then it indicates that keys are not comparable. By default this setting is True and duplicate values are tie-breaked on the key. Using comparable keys improves the performance of the PriorityDict. An optional *iterable* argument provides an initial series of items to populate the PriorityDict. Each item in the sequence must itself contain two items. The first is used as a key in the new dictionary, and the second as the key's value. If a given key is seen more than once, the last value associated with it is retained in the new dictionary. If keyword arguments are given, the keywords themselves with their associated values are added as items to the dictionary. If a key is specified both in the positional argument and as a keyword argument, the value associated with the keyword is retained in the dictionary. For example, these all return a dictionary equal to ``{"one": 2, "two": 3}``: * ``SortedDict(one=2, two=3)`` * ``SortedDict({'one': 2, 'two': 3})`` * ``SortedDict(zip(('one', 'two'), (2, 3)))`` * ``SortedDict([['two', 3], ['one', 2]])`` The first example only works for keys that are valid Python identifiers; the others work with any valid keys. Note that this constructor mimics the Python dict constructor. If you're looking for a constructor like collections.Counter(...), see PriorityDict.count(...). """ self._dict = dict() if len(args) > 0 and isinstance(args[0], bool): if args[0]: self._list = SortedList() else: self._list = SortedListWithKey(key=lambda tup: tup[0]) else: self._list = SortedList() self.iloc = _IlocWrapper(self) self.update(*args, **kwargs) def clear(self): """Remove all elements from the dictionary.""" self._dict.clear() self._list.clear() def clean(self, value=0): """ Remove all items with value less than or equal to `value`. Default `value` is 0. """ _list, _dict = self._list, self._dict pos = self.bisect_right(value) for key in (key for value, key in _list[:pos]): del _dict[key] del _list[:pos] def __contains__(self, key): """Return True if and only if *key* is in the dictionary.""" return key in self._dict def __delitem__(self, key): """ Remove ``d[key]`` from *d*. Raises a KeyError if *key* is not in the dictionary. """ value = self._dict[key] self._list.remove((value, key)) del self._dict[key] def __getitem__(self, key): """ Return the priority of *key* in *d*. Raises a KeyError if *key* is not in the dictionary. """ return self._dict[key] def __iter__(self): """ Create an iterator over the keys of the dictionary ordered by the value sort order. """ return iter(key for value, key in self._list) def __reversed__(self): """ Create an iterator over the keys of the dictionary ordered by the reversed value sort order. """ return iter(key for value, key in reversed(self._list)) def __len__(self): """Return the number of (key, value) pairs in the dictionary.""" return len(self._dict) def __setitem__(self, key, value): """Set `d[key]` to *value*.""" if key in self._dict: old_value = self._dict[key] self._list.remove((old_value, key)) self._list.add((value, key)) self._dict[key] = value def copy(self): """Create a shallow copy of the dictionary.""" result = PriorityDict() result._dict = self._dict.copy() result._list = self._list.copy() result.iloc = _IlocWrapper(result) return result def __copy__(self): """Create a shallow copy of the dictionary.""" return self.copy() @classmethod def fromkeys(cls, iterable, value=0): """ Create a new dictionary with keys from `iterable` and values set to `value`. The default *value* is 0. """ return PriorityDict((key, value) for key in iterable) def get(self, key, default=None): """ Return the value for *key* if *key* is in the dictionary, else *default*. If *default* is not given, it defaults to ``None``, so that this method never raises a KeyError. """ return self._dict.get(key, default) def has_key(self, key): """Return True if and only in *key* is in the dictionary.""" return key in self._dict def pop(self, key, default=_NotGiven): """ If *key* is in the dictionary, remove it and return its value, else return *default*. If *default* is not given and *key* is not in the dictionary, a KeyError is raised. """ if key in self._dict: value = self._dict[key] self._list.remove((value, key)) return self._dict.pop(key) else: if default == _NotGiven: raise KeyError else: return default def popitem(self, index=-1): """ Remove and return item at *index* (default: -1). Raises IndexError if dict is empty or index is out of range. Negative indices are supported as for slice indices. """ value, key = self._list.pop(index) del self._dict[key] return key, value def setdefault(self, key, default=0): """ If *key* is in the dictionary, return its value. If not, insert *key* with a value of *default* and return *default*. *default* defaults to ``0``. """ if key in self._dict: return self._dict[key] else: self._dict[key] = default self._list.add((default, key)) return default def elements(self): """ Return an iterator over elements repeating each as many times as its count. Elements are returned in value sort-order. If an element’s count is less than one, elements() will ignore it. """ values = (repeat(key, value) for value, key in self._list) return chain.from_iterable(values) def most_common(self, count=None): """ Return a list of the `count` highest priority elements with their priority. If `count` is not specified, `most_common` returns *all* elements in the dict. Elements with equal counts are ordered by key. """ _list, _dict = self._list, self._dict if count is None: return [(key, value) for value, key in reversed(_list)] end = len(_dict) start = end - count return [(key, value) for value, key in reversed(_list[start:end])] def subtract(self, elements): """ Elements are subtracted from an iterable or from another mapping (or counter). Like dict.update() but subtracts counts instead of replacing them. Both inputs and outputs may be zero or negative. """ self -= Counter(elements) def tally(self, *args, **kwargs): """ Elements are counted from an iterable or added-in from another mapping (or counter). Like dict.update() but adds counts instead of replacing them. Also, the iterable is expected to be a sequence of elements, not a sequence of (key, value) pairs. """ self += Counter(*args, **kwargs) @classmethod def count(self, *args, **kwargs): """ Consume `args` and `kwargs` with a Counter and use that mapping to initialize a PriorityDict. """ return PriorityDict(Counter(*args, **kwargs)) def update(self, *args, **kwargs): """ Update the dictionary with the key/value pairs from *other*, overwriting existing keys. *update* accepts either another dictionary object or an iterable of key/value pairs (as a tuple or other iterable of length two). If keyword arguments are specified, the dictionary is then updated with those key/value pairs: ``d.update(red=1, blue=2)``. """ _list, _dict = self._list, self._dict if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping): items = args[0] else: items = dict(*args, **kwargs) if (10 * len(items)) > len(_dict): _dict.update(items) _list.clear() _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(items): old_value = _dict[key] _list.remove((old_value, key)) _dict[key] = value _list.add((value, key)) def index(self, key): """ Return the smallest *i* such that `d.iloc[i] == key`. Raises KeyError if *key* is not present. """ value = self._dict[key] return self._list.index((value, key)) def bisect_left(self, value): """ Similar to the ``bisect`` module in the standard library, this returns an appropriate index to insert *value* in PriorityDict. If *value* is already present in PriorityDict, the insertion point will be before (to the left of) any existing entries. """ return self._list.bisect_left((value,)) def bisect(self, value): """Same as bisect_left.""" return self._list.bisect((value,)) def bisect_right(self, value): """ Same as `bisect_left`, but if *value* is already present in PriorityDict, the insertion point will be after (to the right of) any existing entries. """ return self._list.bisect_right((value, _Biggest)) def __iadd__(self, that): """Add values from `that` mapping.""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.update(that) _list.update((value, key) for key, value in iteritems(_dict)) elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: _dict[key] += value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value + value _dict[key] = value _list.add((value, key)) return self def __isub__(self, that): """Subtract values from `that` mapping.""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.clear() _list.clear() elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: _dict[key] -= value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value - value _dict[key] = value _list.add((value, key)) return self def __ior__(self, that): """Or values from `that` mapping (max(v1, v2)).""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.update(that) _list.update((value, key) for key, value in iteritems(_dict)) elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value > value else value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value if old_value > value else value _dict[key] = value _list.add((value, key)) return self def __iand__(self, that): """And values from `that` mapping (min(v1, v2)).""" _list, _dict = self._list, self._dict if len(_dict) == 0: _dict.clear() _list.clear() elif len(that) * 3 > len(_dict): _list.clear() for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value < value else value _list.update((value, key) for key, value in iteritems(_dict)) else: for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _list.remove((old_value, key)) value = old_value if old_value < value else value _dict[key] = value _list.add((value, key)) return self def __add__(self, that): """Add values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: _dict[key] += value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) return result def __sub__(self, that): """Subtract values in `that` mapping from this.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: _dict[key] -= value _list.update((value, key) for key, value in iteritems(_dict)) return result def __or__(self, that): """Or values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value > value else value else: _dict[key] = value _list.update((value, key) for key, value in iteritems(_dict)) return result def __and__(self, that): """And values from this and `that` mapping.""" result = PriorityDict() _list, _dict = result._list, result._dict _dict.update(self._dict) for key, value in iteritems(that): if key in _dict: old_value = _dict[key] _dict[key] = old_value if old_value < value else value _list.update((value, key) for key, value in iteritems(_dict)) return result def __eq__(self, that): """Compare two mappings for equality.""" if isinstance(that, PriorityDict): that = that._dict return self._dict == that def __ne__(self, that): """Compare two mappings for inequality.""" if isinstance(that, PriorityDict): that = that._dict return self._dict != that def __lt__(self, that): """Compare two mappings for less than.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (_dict != that and self <= that) def __le__(self, that): """Compare two mappings for less than equal.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (len(_dict) <= len(that) and all(_dict[key] <= that[key] if key in that else False for key in _dict)) def __gt__(self, that): """Compare two mappings for greater than.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (_dict != that and self >= that) def __ge__(self, that): """Compare two mappings for greater than equal.""" if isinstance(that, PriorityDict): that = that._dict _dict = self._dict return (len(_dict) >= len(that) and all(_dict[key] >= that[key] if key in _dict else False for key in that)) def isdisjoint(self, that): """ Return True if no key in `self` is also in `that`. This doesn't check that the value is greater than zero. To remove keys with value less than or equal to zero see *clean*. """ return not any(key in self for key in that) def items(self): """ Return a list of the dictionary's items (``(key, value)`` pairs). Items are ordered by their value from least to greatest. """ return list((key, value) for value, key in self._list) def iteritems(self): """ Return an iterable over the items (``(key, value)`` pairs) of the dictionary. Items are ordered by their value from least to greatest. """ return iter((key, value) for value, key in self._list) @not26 def viewitems(self): """ In Python 2.7 and later, return a new `ItemsView` of the dictionary's items. Beware iterating the `ItemsView` as items are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewitems() else: return self._dict.items() def keys(self): """ Return a list of the dictionary's keys. Keys are ordered by their corresponding value from least to greatest. """ return list(key for value, key in self._list) def iterkeys(self): """ Return an iterable over the keys of the dictionary. Keys are ordered by their corresponding value from least to greatest. """ return iter(key for value, key in self._list) @not26 def viewkeys(self): """ In Python 2.7 and later, return a new `KeysView` of the dictionary's keys. Beware iterating the `KeysView` as keys are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewkeys() else: return self._dict.keys() def values(self): """ Return a list of the dictionary's values. Values are ordered from least to greatest. """ return list(value for value, key in self._list) def itervalues(self): """ Return an iterable over the values of the dictionary. Values are iterated from least to greatest. """ return iter(value for value, key in self._list) @not26 def viewvalues(self): """ In Python 2.7 and later, return a `ValuesView` of the dictionary's values. Beware iterating the `ValuesView` as values are unordered. In Python 2.6, raise a NotImplementedError. """ if hexversion < 0x03000000: return self._dict.viewvalues() else: return self._dict.values() def __repr__(self): """Return a string representation of PriorityDict.""" return 'PriorityDict({0})'.format(repr(dict(self))) def _check(self): self._list._check() assert len(self._dict) == len(self._list) assert all(key in self._dict and self._dict[key] == value for value, key in self._list)
def group_files_by_size_fast(fileslist, nbgroups, mode=1): # pragma: no cover '''Given a files list with sizes, output a list where the files are grouped in nbgroups per cluster. Pseudo-code for algorithm in O(n log(g)) (thank's to insertion sort or binary search trees) See for more infos: http://cs.stackexchange.com/questions/44406/fast-algorithm-for-clustering-groups-of-elements-given-their-size-time/44614#44614 For each file: - If to-fill list is empty or file.size > first-key(to-fill): * Create cluster c with file in first group g1 * Add to-fill[file.size].append([c, g2], [c, g3], ..., [c, gn]) - Else: * ksize = first-key(to-fill) * c, g = to-fill[ksize].popitem(0) * Add file to cluster c in group g * nsize = ksize - file.size * if nsize > 0: . to-fill[nsize].append([c, g]) . sort to-fill if not an automatic ordering structure ''' ftofill = SortedList() ftofill_pointer = {} fgrouped = [] # [] or {} ford = sorted(fileslist.iteritems(), key=lambda x: x[1]) last_cid = -1 while ford: fname, fsize = ford.pop() #print "----\n"+fname, fsize #if ftofill: print "beforebranch", fsize, ftofill[-1] #print ftofill if not ftofill or fsize > ftofill[-1]: last_cid += 1 #print "Branch A: create cluster %i" % last_cid fgrouped.append([]) #fgrouped[last_cid] = [] fgrouped[last_cid].append([fname]) if mode==0: for g in xrange(nbgroups-1, 0, -1): fgrouped[last_cid].append([]) if not fsize in ftofill_pointer: ftofill_pointer[fsize] = [] ftofill_pointer[fsize].append((last_cid, g)) ftofill.add(fsize) else: for g in xrange(1, nbgroups): try: fgname, fgsize = ford.pop() #print "Added to group %i: %s %i" % (g, fgname, fgsize) except IndexError: break fgrouped[last_cid].append([fgname]) diff_size = fsize - fgsize if diff_size > 0: if not diff_size in ftofill_pointer: ftofill_pointer[diff_size] = [] ftofill_pointer[diff_size].append((last_cid, g)) ftofill.add(diff_size) else: #print "Branch B" ksize = ftofill.pop() c, g = ftofill_pointer[ksize].pop() #print "Assign to cluster %i group %i" % (c, g) fgrouped[c][g].append(fname) nsize = ksize - fsize if nsize > 0: if not nsize in ftofill_pointer: ftofill_pointer[nsize] = [] ftofill_pointer[nsize].append((c, g)) ftofill.add(nsize) return fgrouped