def __divide(i, lo, hi, b4, rank): "Find a split between lo and hi, then recurse on each split." if i.yis == "Num": l = i.numSplit([]) r = i.numSplit(i._lst[lo:hi]) i.stop = last(b4.inits) i.start = first(b4.inits) else: l = i.symSplit([]) r = i.symSplit(i._lst[lo:hi]) i.stop = last(b4.symList) i.start = first(b4.symList) i.epsilon = b4.variety() * THE.div.cohen best = b4.variety() cut = None for j in range(lo, hi): l.add(i._lst[j]) r.sub(i._lst[j]) if l.n >= i.step: if r.n >= i.step: now = i._lst[j - 1] after = i._lst[j] if now == after: continue # print("yis", i.yis, lo) if i.yis == "Num": if abs(r.mu - l.mu) >= i.epsilon: if after - i.start >= i.epsilon: if i.stop - now >= i.epsilon: xpect = l.xpect(r) if xpect * THE.div.trivial < best: best, cut = xpect, j else: # print("Modes: ", r.mode, l.mode, lo) if abs(ord(r.mode) - ord(l.mode)) >= i.epsilon: if ord(after) - ord(i.start) >= i.epsilon: if ord(i.stop) - ord(now) >= i.epsilon: xpect = l.xpect(r) if xpect * THE.div.trivial < best: best, cut = xpect, j if cut: ls, rs = i._lst[lo:cut], i._lst[cut:hi] # print("values:",lo, cut) i.finalcut = cut i.finallow = lo if i.yis == "Num": rank = i.__divide(lo, cut, i.numSplit(ls), rank) + 1 rank = i.__divide(cut, hi, i.numSplit(rs), rank) else: rank = i.__divide(lo, cut, i.symSplit(ls), rank) + 1 rank = i.__divide(cut, hi, i.symSplit(rs), rank) else: i.gain += b4.n * b4.variety() i.ranges += [b4] return rank
def __init__(i, lst, x=first, xis=Num, y=last, yis=Num): i.x, i.xis = x, xis i.y, i.yis = y, yis # print("LIST: ", lst) # print("LIST: ", i.y, i.yis) # i._lst = list(map(lambda x: x.cells, lst)) # i._lst = ordered(lst,key=x) i._lst = lst # we need this or row tobe ordered i._lst.sort(key=lambda test_list: test_list.cells[0]) # print("LIST: ", i._lst) i.xs = i.xis(i._lst, key=x) i.ys = i.yis(i._lst, key=y) i.gain = 0 # where we will be, once done i.step = int(len( i._lst)**THE.div.min) # each split need >= 'step' items i.stop = x(last(i._lst)) # top list value i.start = x(first(i._lst)) # bottom list value i.ranges = [] # the generted ranges i.epsilon = i.xs.sd( ) * THE.div.cohen # bins must be seperated >= epsilon i.finalcut = 0 i.finallow = 0 i.__divide(1, len(i._lst), i.xs, i.ys, 1)
def __init__(i, lst, x="first", y="last", yis="Num"): i.yis = yis i.x_lst, i.y_lst = i.createXYList(lst, yis) i.b4 = i.y_lst i._lst = i.y_lst.numList if i.yis == "Num" else i.y_lst.symList i.gain = 0 # where we will be, once done i.step = int(i.y_lst.n**THE.div.min) # each split need >= 'step' items i.stop = last(i.y_lst.numList) if i.yis == "Num" else last( i.y_lst.symList) # top list value i.start = first(i.y_lst.numList) if i.yis == "Num" else first( i.y_lst.symList) # bottom list value i.ranges = [] # the generted ranges i.epsilon = i.y_lst.variety( ) * THE.div.cohen # bins must be seperated >= epsilon i.__divide(0, i.b4.n, i.b4, 1) i.gain /= len(i._lst) i.splitXList()
def get_item(self, key, left=None, right=None): if right is None: if left is not None: return self.base.get_item(key, left.base) left, right = self.head, self.tail last_lt = last(yield_while(left, lambda x: x.key < key, lambda x: x.next)) if last_lt.next.key == key: return self.base.get_item(key, last_lt.next.base) return self.base.get_item(key, last_lt.base, last_lt.next.base)
def __init__(i, lst, x=same, xis=Num): i.xis = xis i._lst = ordered(lst, key=x) i.b4 = i.xis(i._lst, key=x) i.gain = 0 # where we will be, once done i.x = x # how to get values from 'lst' items i.step = int(len( i._lst)**THE.div.min) # each split need >= 'step' items i.stop = x(last(i._lst)) # top list value i.start = x(first(i._lst)) # bottom list value i.ranges = [] # the generted ranges i.epsilon = i.b4.sd( ) * THE.div.cohen # bins must be seperated >= epsilon i.__divide(1, len(i._lst), i.b4, 1) i.gain /= len(i._lst)
def __init__(self, lst, x=first, y=last, yis=Num): self.ctype = yis self.x = x self.y = y self.lst = ordered(lst, key=x) self.xtype = Num(self.lst, key=x) self.ytype = self.ctype(self.lst, key=y) self.gain = 0 # where we will be, once done #i.x = x # how to get values from 'lst' items self.step = int(len( self.lst)**THE.div.min) # each split need >= 'step' items self.stop = x(last(self.lst)) # top list value self.start = x(first(self.lst)) # bottom list value self.ranges = [] # the generted ranges self.epsilon = self.xtype.sd( ) * THE.div.cohen # bins must be seperated >= epsilon self.divide(1, len(self.lst), 1) self.gain /= len(self.lst)
def __init__(self, lst, x, y, ctypes, key_fn=same): self.ctypes = ctypes self.key_fn = key_fn self.lst = ordered(lst, key="", index=x) self.b4 = [class_type("", idx) for idx, class_type in enumerate(self.ctypes)] for row in self.lst: for idx, val in enumerate(row): self.b4[idx].addVal(val) self.x = x self.y = y self.step = int(len(self.lst)) ** THE.div.min self.gain = 0 self.start = first(self.b4[self.y].all_values) self.stop = last(self.b4[self.y].all_values) self.ranges = [] self.epsilon = self.b4[self.y].variety() self.epsilon *= THE.div.cohen low = 1 high = self.b4[self.y].n self.rank, self.cut, self.best = self.divide(1, low, high, self.b4) self.gain /= self.b4[self.y].n