Ejemplo n.º 1
0
def fold(rows, period, number_bins=20, flatten=True):
    row_tree = Tree()
    for row in rows:
        mod_mjd = row["MJD"] % period
        lst = row_tree.get(mod_mjd, [])
        lst.append(row)
        row_tree[mod_mjd] = lst

    bins = np.linspace(0, period, number_bins + 1)
    for i in range(len(bins) - 1):
        v = bins[i]
        biggest = bins[i + 1]
        value = list(itertools.chain.from_iterable(row_tree[v:biggest].values()))
        if len(value):
            if not flatten:
                yield (v, bin_fluxes(value))
            else:
                flux, error = rebin_error(value)
                yield (v, flux, error)
Ejemplo n.º 2
0
def cbintree_build():
    tree = FastBinaryTree.from_keys(keys)
Ejemplo n.º 3
0
def cbintree_build_delete():
    tree = FastBinaryTree.from_keys(keys)
    for key in keys:
        del tree[key]
Ejemplo n.º 4
0
"""


def random_keys():
    import random
    return random.sample(range(KEYRANGE), KEYS)


try:
    with open('testkeys.txt') as fp:
        keys = eval(fp.read())
except IOError:
    keys = random_keys()

py_searchtree = BinaryTree.from_keys(keys)
cy_searchtree = FastBinaryTree.from_keys(keys)


def bintree_build_delete():
    tree = BinaryTree.from_keys(keys)
    for key in keys:
        del tree[key]


def cbintree_build_delete():
    tree = FastBinaryTree.from_keys(keys)
    for key in keys:
        del tree[key]


def bintree_build():
Ejemplo n.º 5
0
class TDigest(object):

    def __init__(self, delta=0.01, K=100):
        self.C = BinaryTree()
        self.n = 0
        self.delta = delta
        self.K = K

    def __add__(self, other_digest):
        C1 = list(self.C.values())
        C2 = list(other_digest.C.values())
        shuffle(C1)
        shuffle(C2)
        data = C1 + C2
        new_digest = TDigest(self.delta, self.K)
        for c in data:
            new_digest.update((c.mean, c.count))

        return new_digest

    def __len__(self):
        return len(self.C)

    def __repr__(self):
        return """<T-Digest: n=%d, centroids=%d>""" % (self.n, len(self))

    def _add_centroid(self, centroid):
        self.C.insert(centroid.mean, centroid)
        return

    def _compute_centroid_quantile(self, centroid):
        denom = self.n
        cumulative_sum = sum(
            c_i.count for c_i in self.C.value_slice(-float('Inf'), centroid.mean))
        return (centroid.count / 2. + cumulative_sum) / denom

    def batch_update(self, values):
        """
        Update the t-digest with an iterable of values. This API assumes all weights are equal
        to 1.
        """
        w = 1
        for x in values:
            self.update((x, w))
        self.compress()
        return

    def _get_closest_centroids(self, x):
        try:
            ceil_key = self.C.ceiling_key(x)
        except KeyError:
            floor_key = self.C.floor_key(x)
            return [self.C[floor_key]]

        try:
            floor_key = self.C.floor_key(x)
        except KeyError:
            ceil_key = self.C.ceiling_key(x)
            return [self.C[ceil_key]]

        if abs(floor_key - x) < abs(ceil_key - x):
            return [self.C[floor_key]]
        elif abs(floor_key - x) == abs(ceil_key - x) and (ceil_key != floor_key):
            return [self.C[ceil_key], self.C[floor_key]]
        else:
            return [self.C[ceil_key]]

    def update(self, (x, w)):
        """
        Update the t-digest with value x and weight w.

        """
        self.n += w

        if len(self) == 0:
            self._add_centroid(Centroid(x, w))
            return

        S = self._get_closest_centroids(x)

        while len(S) != 0 and w > 0:
            j = choice(range(len(S)))
            c_j = S[j]

            q = self._compute_centroid_quantile(c_j)
            if c_j.count + w > 4 * self.n * self.delta * q * (1 - q):
                S.pop(j)
                continue

            delta_w = min(4 * self.n * self.delta * q * (1 - q) - c_j.count, w)
            self._update_centroid(c_j, x, delta_w)
            w -= delta_w
            S.pop(j)

        if w > 0:
            self._add_centroid(Centroid(x, w))

        if len(self) > self.K / self.delta:
            self.compress()
            self.K *= 2

        return
Ejemplo n.º 6
0
 def __init__(self, delta=0.01, K=100):
     self.C = BinaryTree()
     self.n = 0
     self.delta = delta
     self.K = K
Ejemplo n.º 7
0
 def __init__(self, delta=0.01, K=100):
     self.C = BinaryTree()
     self.n = 0
     self.delta = delta
     self.K = K
Ejemplo n.º 8
0
class TDigest(object):
    def __init__(self, delta=0.01, K=100):
        self.C = BinaryTree()
        self.n = 0
        self.delta = delta
        self.K = K

    def __add__(self, other_digest):
        C1 = list(self.C.values())
        C2 = list(other_digest.C.values())
        shuffle(C1)
        shuffle(C2)
        data = C1 + C2
        new_digest = TDigest(self.delta, self.K)
        for c in data:
            new_digest.update((c.mean, c.count))

        return new_digest

    def __len__(self):
        return len(self.C)

    def __repr__(self):
        return """<T-Digest: n=%d, centroids=%d>""" % (self.n, len(self))

    def _add_centroid(self, centroid):
        self.C.insert(centroid.mean, centroid)
        return

    def _compute_centroid_quantile(self, centroid):
        denom = self.n
        cumulative_sum = sum(
            c_i.count
            for c_i in self.C.value_slice(-float('Inf'), centroid.mean))
        return (centroid.count / 2. + cumulative_sum) / denom

    def batch_update(self, values):
        """
        Update the t-digest with an iterable of values. This API assumes all weights are equal
        to 1.
        """
        w = 1
        for x in values:
            self.update((x, w))
        self.compress()
        return

    def _get_closest_centroids(self, x):
        try:
            ceil_key = self.C.ceiling_key(x)
        except KeyError:
            floor_key = self.C.floor_key(x)
            return [self.C[floor_key]]

        try:
            floor_key = self.C.floor_key(x)
        except KeyError:
            ceil_key = self.C.ceiling_key(x)
            return [self.C[ceil_key]]

        if abs(floor_key - x) < abs(ceil_key - x):
            return [self.C[floor_key]]
        elif abs(floor_key - x) == abs(ceil_key -
                                       x) and (ceil_key != floor_key):
            return [self.C[ceil_key], self.C[floor_key]]
        else:
            return [self.C[ceil_key]]

    def update(self, (x, w)):
        """
        Update the t-digest with value x and weight w.

        """
        self.n += w

        if len(self) == 0:
            self._add_centroid(Centroid(x, w))
            return

        S = self._get_closest_centroids(x)

        while len(S) != 0 and w > 0:
            j = choice(range(len(S)))
            c_j = S[j]

            q = self._compute_centroid_quantile(c_j)
            if c_j.count + w > 4 * self.n * self.delta * q * (1 - q):
                S.pop(j)
                continue

            delta_w = min(4 * self.n * self.delta * q * (1 - q) - c_j.count, w)
            self._update_centroid(c_j, x, delta_w)
            w -= delta_w
            S.pop(j)

        if w > 0:
            self._add_centroid(Centroid(x, w))

        if len(self) > self.K / self.delta:
            self.compress()
            self.K *= 2

        return
Ejemplo n.º 9
0
def cbintree_build():
    tree = FastBinaryTree.from_keys(keys)
Ejemplo n.º 10
0
def cbintree_build_delete():
    tree = FastBinaryTree.from_keys(keys)
    for key in keys:
        del tree[key]
Ejemplo n.º 11
0
setup_FastBinaryTree = """
from __main__ import cbintree_build_delete, cbintree_build, cbintree_search, itercbintree
"""


def random_keys():
    import random
    return random.sample(range(KEYRANGE), KEYS)
try:
    with open('testkeys.txt') as fp:
        keys = eval(fp.read())
except IOError:
    keys = random_keys()

py_searchtree = BinaryTree.from_keys(keys)
cy_searchtree = FastBinaryTree.from_keys(keys)


def bintree_build_delete():
    tree = BinaryTree.from_keys(keys)
    for key in keys:
        del tree[key]


def cbintree_build_delete():
    tree = FastBinaryTree.from_keys(keys)
    for key in keys:
        del tree[key]


def bintree_build():