def __divide(i, lo, hi, b4, rank):
        "Find a split between lo and hi, then recurse on each split."

        if i.yis == "Num":
            l = i.numSplit([])
            r = i.numSplit(i._lst[lo:hi])
            i.stop = last(b4.inits)
            i.start = first(b4.inits)
        else:
            l = i.symSplit([])
            r = i.symSplit(i._lst[lo:hi])
            i.stop = last(b4.symList)
            i.start = first(b4.symList)

        i.epsilon = b4.variety() * THE.div.cohen
        best = b4.variety()
        cut = None
        for j in range(lo, hi):
            l.add(i._lst[j])
            r.sub(i._lst[j])

            if l.n >= i.step:
                if r.n >= i.step:
                    now = i._lst[j - 1]
                    after = i._lst[j]
                    if now == after: continue
                    # print("yis", i.yis, lo)
                    if i.yis == "Num":
                        if abs(r.mu - l.mu) >= i.epsilon:
                            if after - i.start >= i.epsilon:
                                if i.stop - now >= i.epsilon:
                                    xpect = l.xpect(r)
                                    if xpect * THE.div.trivial < best:
                                        best, cut = xpect, j
                    else:
                        # print("Modes: ", r.mode, l.mode, lo)
                        if abs(ord(r.mode) - ord(l.mode)) >= i.epsilon:
                            if ord(after) - ord(i.start) >= i.epsilon:
                                if ord(i.stop) - ord(now) >= i.epsilon:
                                    xpect = l.xpect(r)
                                    if xpect * THE.div.trivial < best:
                                        best, cut = xpect, j

        if cut:
            ls, rs = i._lst[lo:cut], i._lst[cut:hi]
            # print("values:",lo, cut)
            i.finalcut = cut
            i.finallow = lo
            if i.yis == "Num":
                rank = i.__divide(lo, cut, i.numSplit(ls), rank) + 1
                rank = i.__divide(cut, hi, i.numSplit(rs), rank)
            else:
                rank = i.__divide(lo, cut, i.symSplit(ls), rank) + 1
                rank = i.__divide(cut, hi, i.symSplit(rs), rank)
        else:
            i.gain += b4.n * b4.variety()
            i.ranges += [b4]
        return rank
    def __init__(i, lst, x=first, xis=Num, y=last, yis=Num):
        i.x, i.xis = x, xis
        i.y, i.yis = y, yis
        # print("LIST: ", lst)
        # print("LIST: ", i.y, i.yis)
        # i._lst     = list(map(lambda x: x.cells, lst))
        # i._lst     = ordered(lst,key=x)
        i._lst = lst  # we need this or row tobe ordered
        i._lst.sort(key=lambda test_list: test_list.cells[0])

        # print("LIST: ", i._lst)
        i.xs = i.xis(i._lst, key=x)
        i.ys = i.yis(i._lst, key=y)
        i.gain = 0  # where we will be, once done
        i.step = int(len(
            i._lst)**THE.div.min)  # each split need >= 'step' items
        i.stop = x(last(i._lst))  # top list value
        i.start = x(first(i._lst))  # bottom list value
        i.ranges = []  # the generted ranges
        i.epsilon = i.xs.sd(
        ) * THE.div.cohen  # bins must be seperated >= epsilon
        i.finalcut = 0
        i.finallow = 0

        i.__divide(1, len(i._lst), i.xs, i.ys, 1)
Exemple #3
0
 def __init__(i, lst, x="first", y="last", yis="Num"):
     i.yis = yis
     i.x_lst, i.y_lst = i.createXYList(lst, yis)
     i.b4 = i.y_lst
     i._lst = i.y_lst.numList if i.yis == "Num" else i.y_lst.symList
     i.gain = 0  # where we will be, once done
     i.step = int(i.y_lst.n**THE.div.min)  # each split need >= 'step' items
     i.stop = last(i.y_lst.numList) if i.yis == "Num" else last(
         i.y_lst.symList)  # top list value
     i.start = first(i.y_lst.numList) if i.yis == "Num" else first(
         i.y_lst.symList)  # bottom list value
     i.ranges = []  # the generted ranges
     i.epsilon = i.y_lst.variety(
     ) * THE.div.cohen  # bins must be seperated >= epsilon
     i.__divide(0, i.b4.n, i.b4, 1)
     i.gain /= len(i._lst)
     i.splitXList()
Exemple #4
0
 def get_item(self, key, left=None, right=None):
     if right is None:
         if left is not None:
             return self.base.get_item(key, left.base)
         left, right = self.head, self.tail
     last_lt = last(yield_while(left, lambda x: x.key < key, lambda x: x.next))
     if last_lt.next.key == key:
         return self.base.get_item(key, last_lt.next.base)
     return self.base.get_item(key, last_lt.base, last_lt.next.base)
Exemple #5
0
 def __init__(i, lst, x=same, xis=Num):
     i.xis = xis
     i._lst = ordered(lst, key=x)
     i.b4 = i.xis(i._lst, key=x)
     i.gain = 0  # where we will be, once done
     i.x = x  # how to get values from 'lst' items
     i.step = int(len(
         i._lst)**THE.div.min)  # each split need >= 'step' items
     i.stop = x(last(i._lst))  # top list value
     i.start = x(first(i._lst))  # bottom list value
     i.ranges = []  # the generted ranges
     i.epsilon = i.b4.sd(
     ) * THE.div.cohen  # bins must be seperated >= epsilon
     i.__divide(1, len(i._lst), i.b4, 1)
     i.gain /= len(i._lst)
Exemple #6
0
 def __init__(self, lst, x=first, y=last, yis=Num):
     self.ctype = yis
     self.x = x
     self.y = y
     self.lst = ordered(lst, key=x)
     self.xtype = Num(self.lst, key=x)
     self.ytype = self.ctype(self.lst, key=y)
     self.gain = 0  # where we will be, once done
     #i.x = x  # how to get values from 'lst' items
     self.step = int(len(
         self.lst)**THE.div.min)  # each split need >= 'step' items
     self.stop = x(last(self.lst))  # top list value
     self.start = x(first(self.lst))  # bottom list value
     self.ranges = []  # the generted ranges
     self.epsilon = self.xtype.sd(
     ) * THE.div.cohen  # bins must be seperated >= epsilon
     self.divide(1, len(self.lst), 1)
     self.gain /= len(self.lst)
Exemple #7
0
 def __init__(self, lst, x, y, ctypes, key_fn=same):
     self.ctypes = ctypes
     self.key_fn = key_fn
     self.lst = ordered(lst, key="", index=x)
     self.b4 = [class_type("", idx) for idx, class_type in enumerate(self.ctypes)]
     for row in self.lst:
         for idx, val in enumerate(row):
             self.b4[idx].addVal(val)
     self.x = x
     self.y = y
     self.step = int(len(self.lst)) ** THE.div.min
     self.gain = 0
     self.start = first(self.b4[self.y].all_values)
     self.stop = last(self.b4[self.y].all_values)
     self.ranges = []
     self.epsilon = self.b4[self.y].variety()
     self.epsilon *= THE.div.cohen
     low = 1
     high = self.b4[self.y].n
     self.rank, self.cut, self.best = self.divide(1, low, high, self.b4)
     self.gain /= self.b4[self.y].n