Example #1
0
    def slicer(self):
        if self.cache is None:
            self.cache = np.empty(len(self), dtype=self.factor.slicer.dtype)
            slices = self.factor.slicer(0, len(self.factor), SLICELEN)

            def f(offset, slice):
                n = len(slice)
                self.cache[offset:(offset + n)] = slice
                return offset + n

            Seq.reduce(f, 0, slices)
        return VectorSlicer(self.cache)
Example #2
0
def xgblogit(label,
             factors,
             trainselector=None,
             mu=0.5,
             eta=0.1,
             lambda_=1.0,
             gamma=0.0,
             maxdepth=2,
             nrounds=2,
             minh=1.0,
             slicelen=10000):

    f0 = np.empty(len(label), dtype=np.float32)
    f0.fill(logitraw(mu))
    label = label.to_array()
    g = np.zeros(len(f0), dtype=np.float32)
    h = np.zeros(len(f0), dtype=np.float32)

    def step(x, m):
        fm, trees = x
        if trainselector is not None:
            get_gh_sel(trainselector, fm, label, g, h)
        else:
            get_gh(fm, label, g, h)
        g_cov = Covariate.from_array(g)
        h_cov = Covariate.from_array(h)
        tree, predraw = growtree(factors, g_cov, h_cov, fm, eta, maxdepth,
                                 lambda_, gamma, minh, slicelen)
        trees.append(tree)
        return (predraw, trees)

    fm, trees = Seq.reduce(step, (f0, []),
                           Seq.from_gen((i for i in range(nrounds))))
    get_pred(fm)
    return trees, fm
Example #3
0
def get_hist_slice(gsum0, hsum0, nodeids, nodecansplit, factor, gcovariate, hcovariate,
                   start, length, slicelen): 

    nodeslices = VectorSlicer(nodeids)(start, length, slicelen)
    factorslices = factor.slicer(start, length, slicelen)
    gslices = gcovariate.slicer(start, length, slicelen)
    hslices = hcovariate.slicer(start, length, slicelen)
    zipslices = Seq.zip(nodeslices, factorslices, gslices, hslices)

    return Seq.reduce(f_hist, (gsum0, hsum0, nodecansplit), zipslices)
Example #4
0
    def __repr__(self):
        slices = self.slicer(0, min(HEADLENGTH, len(self)), HEADLENGTH)

        def f(acc, slice):
            return acc + ' '.join([str(v) for v in slice]) + " "

        datahead = Seq.reduce(f, "", slices)
        return "BoolVariate {var} with {len} obs: {head}".format(var=self.name,
                                                                 len=len(self),
                                                                 head=datahead)
Example #5
0
    def get_freq(self):
        slices = self.slicer(0, len(self), SLICELEN)
        counts0 = np.zeros(len(self.levels), dtype=np.int32)

        @jit(nopython=True, cache=True)
        def f(acc, slice):
            for v in slice:
                acc[v] = acc[v] + 1
            return acc

        return Seq.reduce(f, counts0, slices)
Example #6
0
    def __repr__(self):
        k = min(HEADLENGTH, len(self))
        slices = self.slicer(0, k, HEADLENGTH)
        levels = self.levels

        def f(acc, slice):
            return acc + ' '.join([levels[i] for i in slice]) + " "

        datahead = Seq.reduce(f, "", slices)
        s = "" if k == len(self) else "..."
        return "Factor {f} with {len} obs and {n} levels: {head}{s}".format(
            f=self.name, len=len(self), head=datahead, n=len(levels) - 1, s=s)
Example #7
0
    def __repr__(self):
        k = min(HEADLENGTH, len(self))
        slices = self.slicer(0, k, HEADLENGTH)

        def f(acc, slice):
            return acc + ' '.join(
                ["." if np.isnan(v) else str(v) for v in slice]) + " "

        datahead = Seq.reduce(f, "", slices)
        s = "" if k == len(self) else "..."
        return "Covariate {cov} with {len} obs: {head}{s}".format(
            cov=self.name, len=len(self), head=datahead, s=s)
Example #8
0
    def unique(self):
        slices = self.slicer(0, len(self), SLICELEN)
        # v, tail = Seq.try_read(slices)
        # set0 = set(v)

        # @jit(nopython=True, cache=True)
        # def f(acc, slice):
        #     for v in slice:
        #         if not np.isnan(v):
        #             acc.add(v)
        #     return acc
        # res = Seq.reduce(f, set0, tail)
        dt = types.float32 if self.slicer.dtype == np.float32 else types.float64
        set0 = Dict.empty(key_type=dt, value_type=dt)

        res = Seq.reduce(f_unique, set0, slices)

        arr = np.array(list(res.keys()), dtype=self.slicer.dtype)
        arr.sort()
        return arr