コード例 #1
0
def xgblogit(label,
             factors,
             trainselector=None,
             mu=0.5,
             eta=0.1,
             lambda_=1.0,
             gamma=0.0,
             maxdepth=2,
             nrounds=2,
             minh=1.0,
             slicelen=10000):

    f0 = np.empty(len(label), dtype=np.float32)
    f0.fill(logitraw(mu))
    label = label.to_array()
    g = np.zeros(len(f0), dtype=np.float32)
    h = np.zeros(len(f0), dtype=np.float32)

    def step(x, m):
        fm, trees = x
        if trainselector is not None:
            get_gh_sel(trainselector, fm, label, g, h)
        else:
            get_gh(fm, label, g, h)
        g_cov = Covariate.from_array(g)
        h_cov = Covariate.from_array(h)
        tree, predraw = growtree(factors, g_cov, h_cov, fm, eta, maxdepth,
                                 lambda_, gamma, minh, slicelen)
        trees.append(tree)
        return (predraw, trees)

    fm, trees = Seq.reduce(step, (f0, []),
                           Seq.from_gen((i for i in range(nrounds))))
    get_pred(fm)
    return trees, fm
コード例 #2
0
ファイル: split.py プロジェクト: pm390/cortado
def splitnodeidsslice(nodeids, factors, issplitnode, nodemap, leftpartitions, factorindex,
                      start, length, slicelength):
    if len(factors) > 0:
        factorslices = NFactorSlicer(factors)(start, length, slicelength) 
        nodeslices = VectorSlicer(nodeids)(start, length, slicelength)

        Seq.foreach(lambda x: f_splitnode(x[0], x[1], issplitnode, leftpartitions, factorindex, nodemap), Seq.zip(nodeslices, factorslices)) 
コード例 #3
0
ファイル: split.py プロジェクト: pm390/cortado
def get_hist_slice(gsum0, hsum0, nodeids, nodecansplit, factor, gcovariate, hcovariate,
                   start, length, slicelen): 

    nodeslices = VectorSlicer(nodeids)(start, length, slicelen)
    factorslices = factor.slicer(start, length, slicelen)
    gslices = gcovariate.slicer(start, length, slicelen)
    hslices = hcovariate.slicer(start, length, slicelen)
    zipslices = Seq.zip(nodeslices, factorslices, gslices, hslices)

    return Seq.reduce(f_hist, (gsum0, hsum0, nodecansplit), zipslices)
コード例 #4
0
    def __call__(self, start, length, slicelen):
        assert start >= 0 and start < len(self.vector)
        length = min(length, len(self.vector) - start)
        slicelen = min(length, slicelen)
        state = start, length, slicelen

        def f(from_to):
            return self.vector[from_to[0]:from_to[1]]

        return Seq.map(f, Seq.from_next(state, next_slice_indices))
コード例 #5
0
ファイル: cutcovfactor.py プロジェクト: pm390/cortado
 def slicer(start, length, slicelen):
     length = min(len(self) - start, length)
     slicelen = min(length, slicelen)
     buf = np.empty(slicelen, dtype=dtype)
     if rightclosed:
         return Seq.map((lambda s: g_rightclosed(s, buf, cuts)),
                        covariate.slicer(start, length, slicelen))
     else:
         return Seq.map(
             (lambda s: g_leftclosed(s, buf, cuts, levelcount - 1)),
             covariate.slicer(start, length, slicelen))
コード例 #6
0
    def slicer(self):
        if self.cache is None:
            self.cache = np.empty(len(self), dtype=self.factor.slicer.dtype)
            slices = self.factor.slicer(0, len(self.factor), SLICELEN)

            def f(offset, slice):
                n = len(slice)
                self.cache[offset:(offset + n)] = slice
                return offset + n

            Seq.reduce(f, 0, slices)
        return VectorSlicer(self.cache)
コード例 #7
0
ファイル: split.py プロジェクト: pm390/cortado
def growtree(factors, gcovariate, hcovariate, fm, eta, maxdepth, lambda_, gamma, minh, slicelen):

    length = len(gcovariate)
    maxnodecount = 2 ** maxdepth
    nodeids = np.zeros(length, dtype=np.uint8) if maxnodecount <= np.iinfo(np.uint8).max else np.zeros(length, dtype=np.uint16)

    loss0 = np.finfo(np.float32).max
    nodes0 = [LeafNode((0.0, 0.0), True, {f : np.full(len(f.levels), True) for f in factors}, loss0)]
    
    state0 = TreeGrowState(nodeids, nodes0, factors, gcovariate, hcovariate, gamma, lambda_, minh, slicelen)
    layers = Seq.tolist(Seq.take(Seq.from_next(state0, nextlayer), maxdepth))
    predict(state0.nodes, nodeids, fm, eta, lambda_)
    return layers, fm
コード例 #8
0
ファイル: fileslicer.py プロジェクト: pm390/cortado
 def __call__(self, start, length, slicelen):
     assert start >= 0
     slicelen = min(length, slicelen)
     iostream = open(self.path, "rb")
     buf = np.empty(slicelen, dtype=self.dtype)
     itemsize = buf.dtype.itemsize
     iostream.seek(itemsize * start)
     state = (buf, iostream, start, length, slicelen)
     return Seq.from_next(state, next_data_chunk)
コード例 #9
0
    def __repr__(self):
        slices = self.slicer(0, min(HEADLENGTH, len(self)), HEADLENGTH)

        def f(acc, slice):
            return acc + ' '.join([str(v) for v in slice]) + " "

        datahead = Seq.reduce(f, "", slices)
        return "BoolVariate {var} with {len} obs: {head}".format(var=self.name,
                                                                 len=len(self),
                                                                 head=datahead)
コード例 #10
0
ファイル: abstractfactor.py プロジェクト: pm390/cortado
    def get_freq(self):
        slices = self.slicer(0, len(self), SLICELEN)
        counts0 = np.zeros(len(self.levels), dtype=np.int32)

        @jit(nopython=True, cache=True)
        def f(acc, slice):
            for v in slice:
                acc[v] = acc[v] + 1
            return acc

        return Seq.reduce(f, counts0, slices)
コード例 #11
0
    def __repr__(self):
        k = min(HEADLENGTH, len(self))
        slices = self.slicer(0, k, HEADLENGTH)

        def f(acc, slice):
            return acc + ' '.join(
                ["." if np.isnan(v) else str(v) for v in slice]) + " "

        datahead = Seq.reduce(f, "", slices)
        s = "" if k == len(self) else "..."
        return "Covariate {cov} with {len} obs: {head}{s}".format(
            cov=self.name, len=len(self), head=datahead, s=s)
コード例 #12
0
ファイル: abstractfactor.py プロジェクト: pm390/cortado
    def __repr__(self):
        k = min(HEADLENGTH, len(self))
        slices = self.slicer(0, k, HEADLENGTH)
        levels = self.levels

        def f(acc, slice):
            return acc + ' '.join([levels[i] for i in slice]) + " "

        datahead = Seq.reduce(f, "", slices)
        s = "" if k == len(self) else "..."
        return "Factor {f} with {len} obs and {n} levels: {head}{s}".format(
            f=self.name, len=len(self), head=datahead, n=len(levels) - 1, s=s)
コード例 #13
0
ファイル: nfactorslicer.py プロジェクト: pm390/cortado
    def __call__(self, start, length, slicelen):
        dtypes = [f.slicer.dtype for f in self.factors]
        alluint8 = all([d == np.uint8 for d in dtypes])
        dtype = np.uint8 if alluint8 else np.uint16
        k = len(self.factors)
        buf = np.empty((k, slicelen), dtype=dtype)

        def f(slices):
            n = len(slices[0])
            for i in range(k):
                if dtypes[i] == dtype:
                    buf[i, :n] = slices[i]
                else:
                    buf[i, :n] = slices[i].astype(dtype)
            if n == slicelen:
                return buf
            else:
                return buf[:, :n]

        return Seq.map(
            f,
            Seq.zip(*[f.slicer(start, length, slicelen)
                      for f in self.factors]))
コード例 #14
0
    def unique(self):
        slices = self.slicer(0, len(self), SLICELEN)
        # v, tail = Seq.try_read(slices)
        # set0 = set(v)

        # @jit(nopython=True, cache=True)
        # def f(acc, slice):
        #     for v in slice:
        #         if not np.isnan(v):
        #             acc.add(v)
        #     return acc
        # res = Seq.reduce(f, set0, tail)
        dt = types.float32 if self.slicer.dtype == np.float32 else types.float64
        set0 = Dict.empty(key_type=dt, value_type=dt)

        res = Seq.reduce(f_unique, set0, slices)

        arr = np.array(list(res.keys()), dtype=self.slicer.dtype)
        arr.sort()
        return arr
コード例 #15
0
 def to_array(self):
     slices = self.slicer(0, len(self), len(self))
     res, _ = Seq.try_read(slices)
     return res
コード例 #16
0
ファイル: constfactor.py プロジェクト: pm390/cortado
 def slicer(start, length, slicelen):
     length = min(self._length - start, length)
     slicelen = min(length, slicelen)
     buf = np.ones(slicelen, dtype = np.uint8)
     return Seq.map((lambda x: buf[x[0]:x[1]]), Seq.from_next((start, length, slicelen), ch.next_slice_indices))
コード例 #17
0
ファイル: covariate.py プロジェクト: pm390/cortado
 def f(start, length, slicelen):
     buf = np.empty(slicelen, dtype=np.float32)
     return Seq.map((lambda s: g(s, buf)), vslicer(start, length, slicelen))
コード例 #18
0
 def slice(start, length, slicelen):
     buf = np.empty(slicelen, dtype = np.float32)
     return Seq.map((lambda slice: g_parse(slice, buf, parsed)), basefactor.slicer(start, length, slicelen))