def solve(n, k): tree = FastRBTree() tree.insert(n, 1) ls = rs = n for i in range(k): key, val = tree.max_item() tree.remove(key) if val > 1: tree.insert(key, val - 1) if key % 2 == 1: key //= 2 ls = rs = key update_tree(tree, key) update_tree(tree, key) else: key //= 2 ls = key rs = key - 1 update_tree(tree, ls) update_tree(tree, rs) return str(ls) + " " + str(rs)
class Tdigest(object): def __init__(self, delta=0.01, K=25, CX=1.1): self.delta = delta self.K = K self.CX = CX self.centroids = RBTree() self.nreset = 0 self.reset() def reset(self): self.centroids.clear() self.n = 0 self.nreset += 1 self.last_cumulate = 0 self.compressing = False def push(self, x, n=1): if not isinstance(x, list): x = [x] for item in x: self._digest(item, n) def percentile(self, p): if self.size() == 0: return None self._cumulate(True) cumn = self.n * p lower = self.centroids.min_item()[1] upper = self.centroids.max_item()[1] for c in self.centroids.values(): if c.cumn <= cumn: lower = c else: upper = c break if lower == upper: return lower.mean return lower.mean + (cumn - lower.cumn) * (upper.mean - lower.mean) / \ (upper.cumn - lower.cumn) def serialize(self): result = '%s~%s~%s~' % (self.delta, self.K, self.size()) if self.size() == 0: return result self._cumulate(True) means = [] counts = [] for c in self.centroids.values(): means.append(str(c.mean)) counts.append(str(c.n)) return '%s%s~%s' % (result, '~'.join(means), '~'.join(counts)) @classmethod def deserialize(cls, serialized_str): if not isinstance(serialized_str, basestring): raise Exception(u'serialized_str must be str') data = serialized_str.split('~') t = Tdigest(delta=float(data[0]), K=int(data[1])) size = int(data[2]) for i in xrange(size): t.push(float(data[i + 3]), int(data[size + i + 3])) t._cumulate(True) return t def _digest(self, x, n): if self.size() == 0: self._new_centroid(x, n, 0) else: _min = self.centroids.min_item()[1] _max = self.centroids.max_item()[1] nearest = self.find_nearest(x) if nearest and nearest.mean == x: self._addweight(nearest, x, n) elif nearest == _min: self._new_centroid(x, n, 0) elif nearest == _max: self._new_centroid(x, n, self.n) else: p = (nearest.cumn + nearest.n / 2.0) / self.n max_n = int(4 * self.n * self.delta * p * (1 - p)) if max_n >= nearest.n + n: self._addweight(nearest, x, n) else: self._new_centroid(x, n, nearest.cumn) self._cumulate(False) if self.K and self.size() > self.K / self.delta: self.compress() def find_nearest(self, x): if self.size() == 0: return None try: lower = self.centroids.ceiling_item(x)[1] except KeyError: lower = None if lower and lower.mean == x: return lower try: prev = self.centroids.floor_item(x)[1] except KeyError: prev = None if not lower: return prev if not prev: return lower if abs(prev.mean - x) < abs(lower.mean - x): return prev else: return lower def size(self): return len(self.centroids) def compress(self): if self.compressing: return points = self.toList() self.reset() self.compressing = True for point in sorted(points, key=lambda x: random()): self.push(point['mean'], point['n']) self._cumulate(True) self.compressing = False def _cumulate(self, exact): if self.n == self.last_cumulate: return if not exact and self.CX and self.last_cumulate and \ self.CX > (self.n / self.last_cumulate): return cumn = 0 for c in self.centroids.values(): cumn = c.cumn = cumn + c.n self.n = self.last_cumulate = cumn def toList(self): return [dict(mean=c.mean, n=c.n, cumn=c.cumn) for c in self.centroids.values()] def _addweight(self, nearest, x, n): if x != nearest.mean: nearest.mean += n * (x - nearest.mean) / (nearest.n + n) nearest.cumn += n nearest.n += n self.n += n def _new_centroid(self, x, n, cumn): c = Centroid(x, n, cumn) self.centroids.insert(x, c) self.n += n return c
class TDigest(object): def __init__(self, delta=0.01, K=25): self.C = RBTree() self.n = 0 self.delta = delta self.K = K def __add__(self, other_digest): C1 = list(self.C.values()) C2 = list(other_digest.C.values()) shuffle(C1) shuffle(C2) data = C1 + C2 new_digest = TDigest(self.delta, self.K) for c in data: new_digest.update(c.mean, c.count) return new_digest def __len__(self): return len(self.C) def __repr__(self): return """<T-Digest: n=%d, centroids=%d>""" % (self.n, len(self)) def _add_centroid(self, centroid): if centroid.mean not in self.C: self.C.insert(centroid.mean, centroid) else: self.C[centroid.mean].update(centroid.mean, centroid.count) def _compute_centroid_quantile(self, centroid): denom = self.n cumulative_sum = sum( c_i.count for c_i in self.C.value_slice(-float('Inf'), centroid.mean)) return (centroid.count / 2. + cumulative_sum) / denom def _update_centroid(self, centroid, x, w): self.C.pop(centroid.mean) centroid.update(x, w) self._add_centroid(centroid) def _find_closest_centroids(self, x): try: ceil_key = self.C.ceiling_key(x) except KeyError: floor_key = self.C.floor_key(x) return [self.C[floor_key]] try: floor_key = self.C.floor_key(x) except KeyError: ceil_key = self.C.ceiling_key(x) return [self.C[ceil_key]] if abs(floor_key - x) < abs(ceil_key - x): return [self.C[floor_key]] elif abs(floor_key - x) == abs(ceil_key - x) and (ceil_key != floor_key): return [self.C[ceil_key], self.C[floor_key]] else: return [self.C[ceil_key]] def _theshold(self, q): return 4 * self.n * self.delta * q * (1 - q) def update(self, x, w=1): """ Update the t-digest with value x and weight w. """ self.n += w if len(self) == 0: self._add_centroid(Centroid(x, w)) return S = self._find_closest_centroids(x) while len(S) != 0 and w > 0: j = choice(list(range(len(S)))) c_j = S[j] q = self._compute_centroid_quantile(c_j) # This filters the out centroids that do not satisfy the second part # of the definition of S. See original paper by Dunning. if c_j.count + w > self._theshold(q): S.pop(j) continue delta_w = min(self._theshold(q) - c_j.count, w) self._update_centroid(c_j, x, delta_w) w -= delta_w S.pop(j) if w > 0: self._add_centroid(Centroid(x, w)) if len(self) > self.K / self.delta: self.compress() return def batch_update(self, values, w=1): """ Update the t-digest with an iterable of values. This assumes all points have the same weight. """ for x in values: self.update(x, w) self.compress() return def compress(self): T = TDigest(self.delta, self.K) C = list(self.C.values()) shuffle(C) for c_i in C: T.update(c_i.mean, c_i.count) self.C = T.C def percentile(self, q): """ Computes the percentile of a specific value in [0,1], ie. computes F^{-1}(q) where F^{-1} denotes the inverse CDF of the distribution. """ if not (0 <= q <= 1): raise ValueError("q must be between 0 and 1, inclusive.") t = 0 q *= self.n for i, key in enumerate(self.C.keys()): c_i = self.C[key] k = c_i.count if q < t + k: if i == 0: return c_i.mean elif i == len(self) - 1: return c_i.mean else: delta = (self.C.succ_item(key)[1].mean - self.C.prev_item(key)[1].mean) / 2. return c_i.mean + ((q - t) / k - 0.5) * delta t += k return self.C.max_item()[1].mean def quantile(self, q): """ Computes the quantile of a specific value, ie. computes F(q) where F denotes the CDF of the distribution. """ t = 0 N = float(self.n) for i, key in enumerate(self.C.keys()): c_i = self.C[key] if i == len(self) - 1: delta = (c_i.mean - self.C.prev_item(key)[1].mean) / 2. else: delta = (self.C.succ_item(key)[1].mean - c_i.mean) / 2. z = max(-1, (q - c_i.mean) / delta) if z < 1: return t / N + c_i.count / N * (z + 1) / 2 t += c_i.count return 1 def trimmed_mean(self, q1, q2): """ Computes the mean of the distribution between the two percentiles q1 and q2. This is a modified algorithm than the one presented in the original t-Digest paper. """ if not (q1 < q2): raise ValueError("q must be between 0 and 1, inclusive.") s = k = t = 0 q1 *= self.n q2 *= self.n for i, key in enumerate(self.C.keys()): c_i = self.C[key] k_i = c_i.count if q1 < t + k_i: if i == 0: delta = self.C.succ_item(key)[1].mean - c_i.mean elif i == len(self) - 1: delta = c_i.mean - self.C.prev_item(key)[1].mean else: delta = (self.C.succ_item(key)[1].mean - self.C.prev_item(key)[1].mean) / 2. nu = ((q1 - t) / k_i - 0.5) * delta s += nu * k_i * c_i.mean k += nu * k_i if q2 < t + k_i: return s/k t += k_i return s/k
from bintrees import FastRBTree T = int(sys.stdin.readline().strip()) def split(n): n -= 1 return n // 2, n // 2 + (n % 2) assert (0, 0) == split(1) assert (0, 1) == split(2) assert (1, 1) == split(3) for t in range(1, T + 1): N, K = map(int, sys.stdin.readline().strip().split()) holes = FastRBTree({N: 1}) while K > 0: size, count = holes.max_item() del holes[size] for s in split(size): holes[s] = holes.get(s, 0) + count K -= count minD, maxD = split(size) print("Case #%d: %d %d" % (t, maxD, minD))
class SparseArray(object): def __init__(self): self.tree = FastRBTree() def __len__(self): try: k, v = self.tree.max_item() except KeyError: return 0 return k + len(v) def __getitem__(self, ndx): try: base, chunk = self.tree.floor_item(ndx) except KeyError: return None offset = ndx - base if offset < len(chunk): return chunk[offset] else: return None def __setitem__(self, ndx, item): try: base, chunk = self.tree.floor_item(ndx) except KeyError: try: base, chunk = self.tree.ceiling_item(ndx) except KeyError: self.tree[ndx] = [item] return if ndx + 1 == base: chunk.insert(0, item) del self.tree[base] self.tree[ndx] = chunk return if base > ndx: self.tree[ndx] = [item] return offset = ndx - base if offset < len(chunk): chunk[offset] = item else: nextbase, nextchunk = (None, None) try: nextbase, nextchunk = self.tree.succ_item(base) except KeyError: pass if offset == len(chunk): chunk.append(item) if offset + 1 == nextbase: chunk += nextchunk del self.tree[nextbase] elif offset + 1 == nextbase: nextchunk.insert(0, item) del self.tree[nextbase] self.tree[ndx] = nextchunk else: self.tree[ndx] = [item] def __delitem__(self, ndx): base, chunk = self.tree.floor_item(ndx) offset = ndx - base if offset < len(chunk): before = chunk[:offset] after = chunk[offset + 1:] if len(before): self.tree[base] = before else: del self.tree[base] if len(after): self.tree[ndx + 1] = after def items(self): for k, vs in self.tree.items(): for n, v in enumerate(vs): yield (k + n, v) def runs(self): return self.tree.items() def run_count(self): return len(self.tree) def __repr__(self): arep = [] for k, v in self.tree.items(): arep.append('[%r]=%s' % (k, ', '.join([repr(item) for item in v]))) return 'SparseArray(%s)' % ', '.join(arep)
class ExclusiveRangeDict(object): """A class like dict whose key is a range [begin, end) of integers. It has an attribute for each range of integers, for example: [10, 20) => Attribute(0), [20, 40) => Attribute(1), [40, 50) => Attribute(2), ... An instance of this class is accessed only via iter_range(begin, end). The instance is accessed as follows: 1) If the given range [begin, end) is not covered by the instance, the range is newly created and iterated. 2) If the given range [begin, end) exactly covers ranges in the instance, the ranges are iterated. (See test_set() in tests/range_dict_tests.py.) 3) If the given range [begin, end) starts at and/or ends at a mid-point of an existing range, the existing range is split by the given range, and ranges in the given range are iterated. For example, consider a case that [25, 45) is given to an instance of [20, 30), [30, 40), [40, 50). In this case, [20, 30) is split into [20, 25) and [25, 30), and [40, 50) into [40, 45) and [45, 50). Then, [25, 30), [30, 40), [40, 45) are iterated. (See test_split() in tests/range_dict_tests.py.) 4) If the given range [begin, end) includes non-existing ranges in an instance, the gaps are filled with new ranges, and all ranges are iterated. For example, consider a case that [25, 50) is given to an instance of [30, 35) and [40, 45). In this case, [25, 30), [35, 40) and [45, 50) are created in the instance, and then [25, 30), [30, 35), [35, 40), [40, 45) and [45, 50) are iterated. (See test_fill() in tests/range_dict_tests.py.) """ class RangeAttribute(object): def __init__(self): pass def __str__(self): return '<RangeAttribute>' def __repr__(self): return '<RangeAttribute>' def copy(self): # pylint: disable=R0201 return ExclusiveRangeDict.RangeAttribute() def __init__(self, attr=RangeAttribute): self._tree = FastRBTree() self._attr = attr def iter_range(self, begin=None, end=None): if not begin: begin = self._tree.min_key() if not end: end = self._tree.max_item()[1][0] # Assume that self._tree has at least one element. if self._tree.is_empty(): self._tree[begin] = (end, self._attr()) # Create a beginning range (border) try: bound_begin, bound_value = self._tree.floor_item(begin) bound_end = bound_value[0] if begin >= bound_end: # Create a blank range. try: new_end, _ = self._tree.succ_item(bound_begin) except KeyError: new_end = end self._tree[begin] = (min(end, new_end), self._attr()) elif bound_begin < begin and begin < bound_end: # Split the existing range. new_end = bound_value[0] new_value = bound_value[1] self._tree[bound_begin] = (begin, new_value.copy()) self._tree[begin] = (new_end, new_value.copy()) else: # bound_begin == begin # Do nothing (just saying it clearly since this part is confusing) pass except KeyError: # begin is less than the smallest element. # Create a blank range. # Note that we can assume self._tree has at least one element. self._tree[begin] = (min(end, self._tree.min_key()), self._attr()) # Create an ending range (border) try: bound_begin, bound_value = self._tree.floor_item(end) bound_end = bound_value[0] if end > bound_end: # Create a blank range. new_begin = bound_end self._tree[new_begin] = (end, self._attr()) elif bound_begin < end and end < bound_end: # Split the existing range. new_end = bound_value[0] new_value = bound_value[1] self._tree[bound_begin] = (end, new_value.copy()) self._tree[end] = (new_end, new_value.copy()) else: # bound_begin == begin # Do nothing (just saying it clearly since this part is confusing) pass except KeyError: # end is less than the smallest element. # It must not happen. A blank range [begin,end) has already been created # even if [begin,end) is less than the smallest range. # Do nothing (just saying it clearly since this part is confusing) raise missing_ranges = [] prev_end = None for range_begin, range_value in self._tree.itemslice(begin, end): range_end = range_value[0] # Note that we can assume that we have a range beginning with |begin| # and a range ending with |end| (they may be the same range). if prev_end and prev_end != range_begin: missing_ranges.append((prev_end, range_begin)) prev_end = range_end for missing_begin, missing_end in missing_ranges: self._tree[missing_begin] = (missing_end, self._attr()) for range_begin, range_value in self._tree.itemslice(begin, end): yield range_begin, range_value[0], range_value[1] def __str__(self): return str(self._tree)
class ExclusiveRangeDict(object): """A class like dict whose key is a range [begin, end) of integers. It has an attribute for each range of integers, for example: [10, 20) => Attribute(0), [20, 40) => Attribute(1), [40, 50) => Attribute(2), ... An instance of this class is accessed only via iter_range(begin, end). The instance is accessed as follows: 1) If the given range [begin, end) is not covered by the instance, the range is newly created and iterated. 2) If the given range [begin, end) exactly covers ranges in the instance, the ranges are iterated. (See test_set() in tests/range_dict_tests.py.) 3) If the given range [begin, end) starts at and/or ends at a mid-point of an existing range, the existing range is split by the given range, and ranges in the given range are iterated. For example, consider a case that [25, 45) is given to an instance of [20, 30), [30, 40), [40, 50). In this case, [20, 30) is split into [20, 25) and [25, 30), and [40, 50) into [40, 45) and [45, 50). Then, [25, 30), [30, 40), [40, 45) are iterated. (See test_split() in tests/range_dict_tests.py.) 4) If the given range [begin, end) includes non-existing ranges in an instance, the gaps are filled with new ranges, and all ranges are iterated. For example, consider a case that [25, 50) is given to an instance of [30, 35) and [40, 45). In this case, [25, 30), [35, 40) and [45, 50) are created in the instance, and then [25, 30), [30, 35), [35, 40), [40, 45) and [45, 50) are iterated. (See test_fill() in tests/range_dict_tests.py.) """ class RangeAttribute(object): def __init__(self): pass def __str__(self): return '<RangeAttribute>' def __repr__(self): return '<RangeAttribute>' def copy(self): # pylint: disable=R0201 return ExclusiveRangeDict.RangeAttribute() def __init__(self, attr=RangeAttribute): self._tree = FastRBTree() self._attr = attr def iter_range(self, begin=None, end=None): if not begin: begin = self._tree.min_key() if not end: end = self._tree.max_item()[1][0] # Assume that self._tree has at least one element. if self._tree.is_empty(): self._tree[begin] = (end, self._attr()) # Create a beginning range (border) try: bound_begin, bound_value = self._tree.floor_item(begin) bound_end = bound_value[0] if begin >= bound_end: # Create a blank range. try: new_end, _ = self._tree.succ_item(bound_begin) except KeyError: new_end = end self._tree[begin] = (min(end, new_end), self._attr()) elif bound_begin < begin and begin < bound_end: # Split the existing range. new_end = bound_value[0] new_value = bound_value[1] self._tree[bound_begin] = (begin, new_value.copy()) self._tree[begin] = (new_end, new_value.copy()) else: # bound_begin == begin # Do nothing (just saying it clearly since this part is confusing) pass except KeyError: # begin is less than the smallest element. # Create a blank range. # Note that we can assume self._tree has at least one element. self._tree[begin] = (min(end, self._tree.min_key()), self._attr()) # Create an ending range (border) try: bound_begin, bound_value = self._tree.floor_item(end) bound_end = bound_value[0] if end > bound_end: # Create a blank range. new_begin = bound_end self._tree[new_begin] = (end, self._attr()) elif bound_begin < end and end < bound_end: # Split the existing range. new_end = bound_value[0] new_value = bound_value[1] self._tree[bound_begin] = (end, new_value.copy()) self._tree[end] = (new_end, new_value.copy()) else: # bound_begin == begin # Do nothing (just saying it clearly since this part is confusing) pass except KeyError: # end is less than the smallest element. # It must not happen. A blank range [begin,end) has already been created # even if [begin,end) is less than the smallest range. # Do nothing (just saying it clearly since this part is confusing) raise missing_ranges = [] prev_end = None for range_begin, range_value in self._tree.itemslice(begin, end): range_end = range_value[0] # Note that we can assume that we have a range beginning with |begin| # and a range ending with |end| (they may be the same range). if prev_end and prev_end != range_begin: missing_ranges.append((prev_end, range_begin)) prev_end = range_end for missing_begin, missing_end in missing_ranges: self._tree[missing_begin] = (missing_end, self._attr()) for range_begin, range_value in self._tree.itemslice(begin, end): yield range_begin, range_value[0], range_value[1] def __str__(self): return str(self._tree)
class TDigest(object): def __init__(self, delta=0.01, K=25): self.C = RBTree() self.n = 0 self.delta = delta self.K = K def __add__(self, other_digest): data = list(chain(self.C.values(), other_digest.C.values())) new_digest = TDigest(self.delta, self.K) if len(data) > 0: for c in pyudorandom.items(data): new_digest.update(c.mean, c.count) return new_digest def __len__(self): return len(self.C) def __repr__(self): return """<T-Digest: n=%d, centroids=%d>""" % (self.n, len(self)) def _add_centroid(self, centroid): if centroid.mean not in self.C: self.C.insert(centroid.mean, centroid) else: self.C[centroid.mean].update(centroid.mean, centroid.count) def _compute_centroid_quantile(self, centroid): denom = self.n cumulative_sum = sum( c_i.count for c_i in self.C.value_slice(-float('Inf'), centroid.mean)) return (centroid.count / 2. + cumulative_sum) / denom def _update_centroid(self, centroid, x, w): self.C.pop(centroid.mean) centroid.update(x, w) self._add_centroid(centroid) def _find_closest_centroids(self, x): try: ceil_key = self.C.ceiling_key(x) except KeyError: floor_key = self.C.floor_key(x) return [self.C[floor_key]] try: floor_key = self.C.floor_key(x) except KeyError: ceil_key = self.C.ceiling_key(x) return [self.C[ceil_key]] if abs(floor_key - x) < abs(ceil_key - x): return [self.C[floor_key]] elif abs(floor_key - x) == abs(ceil_key - x) and (ceil_key != floor_key): return [self.C[ceil_key], self.C[floor_key]] else: return [self.C[ceil_key]] def _theshold(self, q): return 4 * self.n * self.delta * q * (1 - q) def update(self, x, w=1): """ Update the t-digest with value x and weight w. """ self.n += w if len(self) == 0: self._add_centroid(Centroid(x, w)) return S = self._find_closest_centroids(x) while len(S) != 0 and w > 0: j = choice(list(range(len(S)))) c_j = S[j] q = self._compute_centroid_quantile(c_j) # This filters the out centroids that do not satisfy the second part # of the definition of S. See original paper by Dunning. if c_j.count + w > self._theshold(q): S.pop(j) continue delta_w = min(self._theshold(q) - c_j.count, w) self._update_centroid(c_j, x, delta_w) w -= delta_w S.pop(j) if w > 0: self._add_centroid(Centroid(x, w)) if len(self) > self.K / self.delta: self.compress() return def batch_update(self, values, w=1): """ Update the t-digest with an iterable of values. This assumes all points have the same weight. """ for x in values: self.update(x, w) self.compress() return def compress(self): T = TDigest(self.delta, self.K) C = list(self.C.values()) for c_i in pyudorandom.items(C): T.update(c_i.mean, c_i.count) self.C = T.C def percentile(self, p): """ Computes the percentile of a specific value in [0,100]. """ if not (0 <= p <= 100): raise ValueError("p must be between 0 and 100, inclusive.") t = 0 p = float(p) / 100. p *= self.n for i, key in enumerate(self.C.keys()): c_i = self.C[key] k = c_i.count if p < t + k: if i == 0: return c_i.mean elif i == len(self) - 1: return c_i.mean else: delta = (self.C.succ_item(key)[1].mean - self.C.prev_item(key)[1].mean) / 2. return c_i.mean + ((p - t) / k - 0.5) * delta t += k return self.C.max_item()[1].mean def quantile(self, q): """ Computes the quantile of a specific value, ie. computes F(q) where F denotes the CDF of the distribution. """ t = 0 N = float(self.n) for i, key in enumerate(self.C.keys()): c_i = self.C[key] if i == len(self) - 1: delta = (c_i.mean - self.C.prev_item(key)[1].mean) / 2. else: delta = (self.C.succ_item(key)[1].mean - c_i.mean) / 2. z = max(-1, (q - c_i.mean) / delta) if z < 1: return t / N + c_i.count / N * (z + 1) / 2 t += c_i.count return 1 def trimmed_mean(self, p1, p2): """ Computes the mean of the distribution between the two percentiles p1 and p2. This is a modified algorithm than the one presented in the original t-Digest paper. """ if not (p1 < p2): raise ValueError("p1 must be between 0 and 100 and less than p2.") s = k = t = 0 p1 /= 100. p2 /= 100. p1 *= self.n p2 *= self.n for i, key in enumerate(self.C.keys()): c_i = self.C[key] k_i = c_i.count if p1 < t + k_i: if i == 0: delta = self.C.succ_item(key)[1].mean - c_i.mean elif i == len(self) - 1: delta = c_i.mean - self.C.prev_item(key)[1].mean else: delta = (self.C.succ_item(key)[1].mean - self.C.prev_item(key)[1].mean) / 2. nu = ((p1 - t) / k_i - 0.5) * delta s += nu * k_i * c_i.mean k += nu * k_i if p2 < t + k_i: return s / k t += k_i return s / k