def fold(rows, period, number_bins=20, flatten=True): row_tree = Tree() for row in rows: mod_mjd = row["MJD"] % period lst = row_tree.get(mod_mjd, []) lst.append(row) row_tree[mod_mjd] = lst bins = np.linspace(0, period, number_bins + 1) for i in range(len(bins) - 1): v = bins[i] biggest = bins[i + 1] value = list(itertools.chain.from_iterable(row_tree[v:biggest].values())) if len(value): if not flatten: yield (v, bin_fluxes(value)) else: flux, error = rebin_error(value) yield (v, flux, error)
def cbintree_build(): tree = FastBinaryTree.from_keys(keys)
def cbintree_build_delete(): tree = FastBinaryTree.from_keys(keys) for key in keys: del tree[key]
""" def random_keys(): import random return random.sample(range(KEYRANGE), KEYS) try: with open('testkeys.txt') as fp: keys = eval(fp.read()) except IOError: keys = random_keys() py_searchtree = BinaryTree.from_keys(keys) cy_searchtree = FastBinaryTree.from_keys(keys) def bintree_build_delete(): tree = BinaryTree.from_keys(keys) for key in keys: del tree[key] def cbintree_build_delete(): tree = FastBinaryTree.from_keys(keys) for key in keys: del tree[key] def bintree_build():
class TDigest(object): def __init__(self, delta=0.01, K=100): self.C = BinaryTree() self.n = 0 self.delta = delta self.K = K def __add__(self, other_digest): C1 = list(self.C.values()) C2 = list(other_digest.C.values()) shuffle(C1) shuffle(C2) data = C1 + C2 new_digest = TDigest(self.delta, self.K) for c in data: new_digest.update((c.mean, c.count)) return new_digest def __len__(self): return len(self.C) def __repr__(self): return """<T-Digest: n=%d, centroids=%d>""" % (self.n, len(self)) def _add_centroid(self, centroid): self.C.insert(centroid.mean, centroid) return def _compute_centroid_quantile(self, centroid): denom = self.n cumulative_sum = sum( c_i.count for c_i in self.C.value_slice(-float('Inf'), centroid.mean)) return (centroid.count / 2. + cumulative_sum) / denom def batch_update(self, values): """ Update the t-digest with an iterable of values. This API assumes all weights are equal to 1. """ w = 1 for x in values: self.update((x, w)) self.compress() return def _get_closest_centroids(self, x): try: ceil_key = self.C.ceiling_key(x) except KeyError: floor_key = self.C.floor_key(x) return [self.C[floor_key]] try: floor_key = self.C.floor_key(x) except KeyError: ceil_key = self.C.ceiling_key(x) return [self.C[ceil_key]] if abs(floor_key - x) < abs(ceil_key - x): return [self.C[floor_key]] elif abs(floor_key - x) == abs(ceil_key - x) and (ceil_key != floor_key): return [self.C[ceil_key], self.C[floor_key]] else: return [self.C[ceil_key]] def update(self, (x, w)): """ Update the t-digest with value x and weight w. """ self.n += w if len(self) == 0: self._add_centroid(Centroid(x, w)) return S = self._get_closest_centroids(x) while len(S) != 0 and w > 0: j = choice(range(len(S))) c_j = S[j] q = self._compute_centroid_quantile(c_j) if c_j.count + w > 4 * self.n * self.delta * q * (1 - q): S.pop(j) continue delta_w = min(4 * self.n * self.delta * q * (1 - q) - c_j.count, w) self._update_centroid(c_j, x, delta_w) w -= delta_w S.pop(j) if w > 0: self._add_centroid(Centroid(x, w)) if len(self) > self.K / self.delta: self.compress() self.K *= 2 return
def __init__(self, delta=0.01, K=100): self.C = BinaryTree() self.n = 0 self.delta = delta self.K = K
setup_FastBinaryTree = """ from __main__ import cbintree_build_delete, cbintree_build, cbintree_search, itercbintree """ def random_keys(): import random return random.sample(range(KEYRANGE), KEYS) try: with open('testkeys.txt') as fp: keys = eval(fp.read()) except IOError: keys = random_keys() py_searchtree = BinaryTree.from_keys(keys) cy_searchtree = FastBinaryTree.from_keys(keys) def bintree_build_delete(): tree = BinaryTree.from_keys(keys) for key in keys: del tree[key] def cbintree_build_delete(): tree = FastBinaryTree.from_keys(keys) for key in keys: del tree[key] def bintree_build():