예제 #1
0
 def divide(self, lo, hi, rank):
     "Find a split between lo and hi, then recurse on each split."
     xleft = Num(key=self.x)
     yleft = self.ctype(key=self.y)
     xright = Num(self.lst[lo:hi], key=self.x)
     yright = self.ctype(self.lst[lo:hi], key=self.y)
     xb4 = deepcopy(xright)
     yb4 = deepcopy(yright)
     best = yb4.variety()
     cut = None
     for j in range(lo, hi):
         xleft + self.lst[j]
         yleft + self.lst[j]
         xright - self.lst[j]
         yright - self.lst[j]
         if xleft.n >= self.step:
             if xright.n >= self.step:
                 now = self.x(self.lst[j - 1])
                 after = self.x(self.lst[j])
                 if now == after: continue
                 if abs(xright.mu - xleft.mu) >= self.epsilon:
                     if after - self.start >= self.epsilon:
                         if self.stop - now >= self.epsilon:
                             xpect = yleft.xpect(yright)
                             if xpect * THE.div.trivial < best:
                                 best, cut = xpect, j
     if cut:
         rank = self.divide(lo, cut, rank) + 1
         rank = self.divide(cut, hi, rank)
     else:
         self.gain += xb4.n * xb4.variety()
         xb4.rank = rank
         self.ranges += [(xb4, yb4)]
     return rank
    def createXYList(i, lst, yis, y_index):
        # print("list!!", lst)
        # print("List before: ",lst)
        if yis == "Num":
            y_lst = Num()
        else:
            y_lst = Sym()
        x_lst = []
        lst = sorted(lst, key=lambda x: x.cells[y_index])

        for column in range(len(lst[0].cells) -
                            1):  #last column excluded for goal
            if not column == y_index:
                temp = Num()
                for row in lst:
                    temp.add(row.cells[column])
                # print("temp: ", temp)

                x_lst.append(temp)

        for row in lst:
            y_lst.add(row.cells[y_index])

        # print(x_lst)
        return x_lst, y_lst
예제 #3
0
    def createXYList(i, lst, yis):
        lst = sorted(lst, key=lambda x: x[1])
        x_lst = Num()
        if yis == "Num":
            y_lst = Num()
        else:
            y_lst = Sym()
        for i in lst:
            x_lst.add(i[0])
            y_lst.add(i[1])

        return x_lst, y_lst
 def tree(i, lst, y, yis, lvl=0):
     if len(lst) >= THE.tree.minObs * 2:
         # find the best column
         lo, cut, col = -10**32, None, None
         for col1 in i.cols.indep:
             x = lambda row: row.cells[col1.pos]
             d = Div2(lst, x=x, y=y, yis=yis)
             cut1, lo1 = d.finalcutlow()
             # print(cut1, lo1)
             if cut1:
                 if lo1 < lo:
                     cut, lo, col = cut1, lo1, col1
                     print("updated: ", cut, lo, col)
         # if a cut exists
         if cut:
             # split data on best col, call i.tree on each split
             x = lambda row: row.cells[col.pos]
             return [
                 o(lo=lo,
                   hi=hi,
                   n=len(kids),
                   txt=col.txt,
                   kids=i.tree(kids, y, yis, lvl + 1))
                 for lo, hi, kids in col.split(lst, x, cut)
             ]
     if yis == "Num":
         return Num(lst, key=y)
     else:
         return Sym(lst, key=y)
예제 #5
0
class Div2_3(Pretty):
    """
    Recursively divide a list of numns by finding splits
    that minimizing the expected value of the standard
    deviation (after the splits).
    """
    def __init__(self, lst, x=first, y=last, yis=Num):
        self.ctype = yis
        self.x = x
        self.y = y
        self.lst = ordered(lst, key=x)
        self.xtype = Num(self.lst, key=x)
        self.ytype = self.ctype(self.lst, key=y)
        self.gain = 0  # where we will be, once done
        #i.x = x  # how to get values from 'lst' items
        self.step = int(len(
            self.lst)**THE.div.min)  # each split need >= 'step' items
        self.stop = x(last(self.lst))  # top list value
        self.start = x(first(self.lst))  # bottom list value
        self.ranges = []  # the generted ranges
        self.epsilon = self.xtype.sd(
        ) * THE.div.cohen  # bins must be seperated >= epsilon
        self.divide(1, len(self.lst), 1)
        self.gain /= len(self.lst)

    #TODO: check the argument passing thing
    def divide(self, lo, hi, rank):
        "Find a split between lo and hi, then recurse on each split."
        xleft = Num(key=self.x)
        yleft = self.ctype(key=self.y)
        xright = Num(self.lst[lo:hi], key=self.x)
        yright = self.ctype(self.lst[lo:hi], key=self.y)
        xb4 = deepcopy(xright)
        yb4 = deepcopy(yright)
        best = yb4.variety()
        cut = None
        for j in range(lo, hi):
            xleft + self.lst[j]
            yleft + self.lst[j]
            xright - self.lst[j]
            yright - self.lst[j]
            if xleft.n >= self.step:
                if xright.n >= self.step:
                    now = self.x(self.lst[j - 1])
                    after = self.x(self.lst[j])
                    if now == after: continue
                    if abs(xright.mu - xleft.mu) >= self.epsilon:
                        if after - self.start >= self.epsilon:
                            if self.stop - now >= self.epsilon:
                                xpect = yleft.xpect(yright)
                                if xpect * THE.div.trivial < best:
                                    best, cut = xpect, j
        if cut:
            rank = self.divide(lo, cut, rank) + 1
            rank = self.divide(cut, hi, rank)
        else:
            self.gain += xb4.n * xb4.variety()
            xb4.rank = rank
            self.ranges += [(xb4, yb4)]
        return rank
예제 #6
0
 def __init__(self, lst, x=first, y=last, yis=Num):
     self.ctype = yis
     self.x = x
     self.y = y
     self.lst = ordered(lst, key=x)
     self.xtype = Num(self.lst, key=x)
     self.ytype = self.ctype(self.lst, key=y)
     self.gain = 0  # where we will be, once done
     #i.x = x  # how to get values from 'lst' items
     self.step = int(len(
         self.lst)**THE.div.min)  # each split need >= 'step' items
     self.stop = x(last(self.lst))  # top list value
     self.start = x(first(self.lst))  # bottom list value
     self.ranges = []  # the generted ranges
     self.epsilon = self.xtype.sd(
     ) * THE.div.cohen  # bins must be seperated >= epsilon
     self.divide(1, len(self.lst), 1)
     self.gain /= len(self.lst)
예제 #7
0
 def numSplit(i, lst):
     newNumber = Num()
     if lst:
         for x in lst:
             newNumber.add(x)
     return newNumber