Beispiel #1
0
 def groupby3(arg):
     arg = as_num_array(arg)
     n = float(len(arg))
     gfx = as_num_array([len(list(g)) for k, g in groupby(sort(arg))]) / n
     gfx *= gfx
     out = 1.0 - gfx.sum()
     return out
Beispiel #2
0
 def isort(dep,indep,cutpoints=None,**kwargs):
   dep = as_num_array(dep)
   indep = as_num_array(indep)
   if cutpoints is None:
     cutpoints = midpoints_integer(indep)
   if not len(cutpoints):
     return []
   # sort both vectors by *indep*
   idx = argsort(indep)
   dep = take(dep,idx)
   indep = take(indep,idx)
   cutidx = [0,0]
   for ival,isub in it.groupby(indep):
     ilen = len(list(isub))
     if ival < cutpoints[len(cutidx)-2]:
       cutidx[-1] += ilen
     else:
       if len(cutidx) > len(cutpoints):
         break
       cutidx.append(cutidx[-1]+ilen)
   assert len(cutidx)-1 == len(cutpoints), '%s != %s' % (len(cutidx)-1,len(cutpoints))
   out = []
   cnt2 = dict(histo_tuple(dep))
   cnt1 = dict.fromkeys(cnt2.keys(),0)
   for i1,i2 in izip(cutidx[:-1],cutidx[1:]):
     # update the counts from the last cut
     for d,cnt in histo_tuple(dep[i1:i2]):
       cnt1[d] += cnt
       cnt2[d] -= cnt
     # calculate results based on counts
     a1 = as_num_array([val for val in cnt1.itervalues() if val != 0])
     a2 = as_num_array([val for val in cnt2.itervalues() if val != 0])
     out.append(gini2_counts(a1,a2))
   assert len(out) == len(cutpoints), '%s != %s' % (len(out),len(cutpoints))
   return out
Beispiel #3
0
 def vector1(arg1, arg2):
     a1 = as_num_array(arg1)
     a2 = as_num_array(arg2)
     c1 = a1.sum()
     c2 = a2.sum()
     n = float(c1 + c2)
     return (gini_counts(a1) * c1 / n) + (gini_counts(a2) * c2 / n)
Beispiel #4
0
 def missing1(arg,delta=1,first=False):
   # build answer lookup mapping each arg value to first index
   run_lens = [(k,len(list(g))) for k,g in groupby(arg)]
   keys = as_num_array([k for k,l in run_lens])
   lens = as_num_array([l for k,l in run_lens])
   ends = cumsum(lens)
   starts = ends - lens
   if first:
     answer = dict(izip(keys,starts))
   else:
     answer = dict(izip(keys,ends-1))
   # identify missing keys
   need = keys + delta
   needset = set(need)
   haveset = set(answer)
   fillset = needset.difference(haveset)
   fill = as_num_array(sorted(fillset))
   #
   minkey,maxkey = arg[0],arg[-1]
   #
   have_iter = iter(keys[-1::-1])
   fill_iter = iter(fill[-1::-1])
   thiskey = maxkey
   thisval = answer[thiskey]
   for fillkey in fill_iter:
     if thiskey >= fillkey:
       try:
         thiskey = dropwhile(lambda x:x>=fillkey,have_iter).next()
       except StopIteration:
         thiskey = minkey
       thisval = answer[thiskey]
     answer[fillkey] = thisval
   out = [answer[val+delta] for val in arg]
   return out
Beispiel #5
0
 def vector1(arg1,arg2):
   a1 = as_num_array(arg1)
   a2 = as_num_array(arg2)
   c1 = a1.sum()
   c2 = a2.sum()
   n = float(c1+c2)
   return (gini_counts(a1)*c1/n) + (gini_counts(a2)*c2/n)
Beispiel #6
0
 def deltacnt1(dep,indep,cutpoints=None,**kwargs):
   dep = as_num_array(dep)
   indep = as_num_array(indep)
   if cutpoints is None:
     cutpoints = midpoints_integer(indep)
   out = []
   # dictionary of counts in each dataset
   cnt2 = dict(histo_tuple(dep))
   cnt1 = dict.fromkeys(cnt2.keys(),0)
   lastmask = (indep != indep)
   for cut in cutpoints:
     mask = indep < cut
     # examine only the new values from the last cut
     maskdelta = mask & ~lastmask
     lastmask |= mask
     idxdelta = nonzero(maskdelta)[0]
     # update the counts from the last cut
     for d in dep[idxdelta]:
       cnt1[d] += 1
       cnt2[d] -= 1
     # calculate results based on counts
     a1 = as_num_array([val for val in cnt1.itervalues() if val != 0])
     a2 = as_num_array([val for val in cnt2.itervalues() if val != 0])
     out.append(gini2_counts(a1,a2))
   return out
Beispiel #7
0
 def deltacnt1(dep, indep, cutpoints=None, **kwargs):
     dep = as_num_array(dep)
     indep = as_num_array(indep)
     if cutpoints is None:
         cutpoints = midpoints_integer(indep)
     out = []
     # dictionary of counts in each dataset
     cnt2 = dict(histo_tuple(dep))
     cnt1 = dict.fromkeys(cnt2.keys(), 0)
     lastmask = (indep != indep)
     for cut in cutpoints:
         mask = indep < cut
         # examine only the new values from the last cut
         maskdelta = mask & ~lastmask
         lastmask |= mask
         idxdelta = nonzero(maskdelta)[0]
         # update the counts from the last cut
         for d in dep[idxdelta]:
             cnt1[d] += 1
             cnt2[d] -= 1
         # calculate results based on counts
         a1 = as_num_array([val for val in cnt1.itervalues() if val != 0])
         a2 = as_num_array([val for val in cnt2.itervalues() if val != 0])
         out.append(gini2_counts(a1, a2))
     return out
Beispiel #8
0
 def groupby3(arg):
   arg = as_num_array(arg)
   n = float(len(arg))
   gfx = as_num_array([len(list(g)) for k,g in groupby(sort(arg))])/n
   gfx *= gfx
   out = 1.0 - gfx.sum()
   return out
Beispiel #9
0
 def naive2(arg,sel=None,step=1):
   arg = as_num_array(arg)
   idx = arg_sel_step_to_idx(arg,sel,step)
   jj = idx[:-step]
   kk = idx[step:]
   omin = [arg[j+1:k+1].min() for j,k in izip(jj,kk)]
   omax = [arg[j+1:k+1].max() for j,k in izip(jj,kk)]
   return as_num_array(omax) - omin
Beispiel #10
0
 def naive2(arg, sel=None, step=1):
     arg = as_num_array(arg)
     idx = arg_sel_step_to_idx(arg, sel, step)
     jj = idx[:-step]
     kk = idx[step:]
     omin = [arg[j + 1:k + 1].min() for j, k in izip(jj, kk)]
     omax = [arg[j + 1:k + 1].max() for j, k in izip(jj, kk)]
     return as_num_array(omax) - omin
Beispiel #11
0
 def smart(dep,indep,cutpoints=None,**kwargs):
   # not needed unless high penalty for small datasets
   dep = as_num_array(dep)
   indep = as_num_array(indep)
   if cutpoints is None:
     cutpoints = midpoints_integer(indep)
   if len(dep) < 100:
     return PartitionIntegerGini.naive(dep,indep,cutpoints=cutpoints,**kwargs)
   return PartitionIntegerGini.isort(dep,indep,cutpoints=cutpoints,**kwargs)
Beispiel #12
0
 def fast(arg1, arg2, reset_value=0.0, out=None):
     arg1 = as_num_array(arg1)
     arg2 = as_num_array(arg2)
     if not out:
         out = arg1.new()
     cusum_func = CusumReset().iterfunc
     log_odds = log(arg2 / arg1)
     cusum_func(log_odds, reset_value=reset_value, out=out)
     return out
Beispiel #13
0
 def fast(arg1, arg2, reset_value=0.0, out=None):
     arg1 = as_num_array(arg1)
     arg2 = as_num_array(arg2)
     if not out:
         out = arg1.new()
     cusum_func = CusumReset().iterfunc
     log_odds = log(arg2 / arg1)
     cusum_func(log_odds, reset_value=reset_value, out=out)
     return out
Beispiel #14
0
 def naive1(arg,sel=None,step=1):
   arg = as_num_array(arg)
   idx = arg_sel_step_to_idx(arg,sel,step)
   jj = idx[:-step]
   kk = idx[step:]
   segments = [arg[j+1:k+1] for j,k in izip(jj,kk) if j!=k]
   omin = [seg.min() for seg in segments if len(seg)]
   omax = [seg.max() for seg in segments if len(seg)]
   return as_num_array(omax) - omin
Beispiel #15
0
 def naive1(arg, sel=None, step=1):
     arg = as_num_array(arg)
     idx = arg_sel_step_to_idx(arg, sel, step)
     jj = idx[:-step]
     kk = idx[step:]
     segments = [arg[j + 1:k + 1] for j, k in izip(jj, kk) if j != k]
     omin = [seg.min() for seg in segments if len(seg)]
     omax = [seg.max() for seg in segments if len(seg)]
     return as_num_array(omax) - omin
Beispiel #16
0
 def naive2(arg,sel=None,step=1,func=sum):
   arg = as_num_array(arg)
   idx = arg_sel_step_to_idx(arg,sel,step)
   jj = idx[:-step]
   kk = idx[step:]
   out = []
   for j,k in izip(jj,kk):
     chunk = arg[j+1:k+1]
     out.append(func(chunk))
   return as_num_array(out)
Beispiel #17
0
 def naive1(arg,sel=None,step=1,func=sum):
   arg = as_num_array(arg)
   idx = arg_sel_step_to_idx(arg,sel,step)
   out = []
   for i in xrange(len(idx)-step):
     j = idx[i]
     k = idx[i+step]
     chunk = arg[j+1:k+1]
     out.append(func(chunk))
   return as_num_array(out)
Beispiel #18
0
 def naive1(arg, sel=None, step=1, func=sum):
     arg = as_num_array(arg)
     idx = arg_sel_step_to_idx(arg, sel, step)
     out = []
     for i in xrange(len(idx) - step):
         j = idx[i]
         k = idx[i + step]
         chunk = arg[j + 1:k + 1]
         out.append(func(chunk))
     return as_num_array(out)
Beispiel #19
0
 def naive2(arg, sel=None, step=1, func=sum):
     arg = as_num_array(arg)
     idx = arg_sel_step_to_idx(arg, sel, step)
     jj = idx[:-step]
     kk = idx[step:]
     out = []
     for j, k in izip(jj, kk):
         chunk = arg[j + 1:k + 1]
         out.append(func(chunk))
     return as_num_array(out)
Beispiel #20
0
 def masksel(dep,indep,cutpoints=None,**kwargs):
   dep = as_num_array(dep)
   indep = as_num_array(indep)
   if cutpoints is None:
     cutpoints = midpoints_integer(indep)
   out = []
   for cut in cutpoints:
     mask = indep < cut
     a1 = dep[mask]
     a2 = dep[~mask]
     out.append(gini2(a1,a2))
   return out
Beispiel #21
0
 def masksel(dep, indep, cutpoints=None, **kwargs):
     dep = as_num_array(dep)
     indep = as_num_array(indep)
     if cutpoints is None:
         cutpoints = midpoints_integer(indep)
     out = []
     for cut in cutpoints:
         mask = indep < cut
         a1 = dep[mask]
         a2 = dep[~mask]
         out.append(gini2(a1, a2))
     return out
Beispiel #22
0
 def idxsel(dep, indep, cutpoints=None, **kwargs):
     dep = as_num_array(dep)
     indep = as_num_array(indep)
     if cutpoints is None:
         cutpoints = midpoints_integer(indep)
     out = []
     for cut in cutpoints:
         mask = indep < cut
         i1 = nonzero(mask)[0]
         i2 = nonzero(~mask)[0]
         a1 = dep[i1]
         a2 = dep[i2]
         out.append(gini2(a1, a2))
     return out
Beispiel #23
0
 def idxsel(dep,indep,cutpoints=None,**kwargs):
   dep = as_num_array(dep)
   indep = as_num_array(indep)
   if cutpoints is None:
     cutpoints = midpoints_integer(indep)
   out = []
   for cut in cutpoints:
     mask = indep < cut
     i1 = nonzero(mask)[0]
     i2 = nonzero(~mask)[0]
     a1 = dep[i1]
     a2 = dep[i2]
     out.append(gini2(a1,a2))
   return out
Beispiel #24
0
 def smart(dep, indep, cutpoints=None, **kwargs):
     # not needed unless high penalty for small datasets
     dep = as_num_array(dep)
     indep = as_num_array(indep)
     if cutpoints is None:
         cutpoints = midpoints_integer(indep)
     if len(dep) < 100:
         return PartitionIntegerGini.naive(dep,
                                           indep,
                                           cutpoints=cutpoints,
                                           **kwargs)
     return PartitionIntegerGini.isort(dep,
                                       indep,
                                       cutpoints=cutpoints,
                                       **kwargs)
Beispiel #25
0
 def naive_comp(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
     out[0] = 0
   out[1:] = [arg[i]-arg[i-1] for i in xrange(1,len(arg))]
   return out
Beispiel #26
0
 def fast(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
     out[0] = 0
   subtract(arg[1:],arg[:-1],out[1:])
   return out
Beispiel #27
0
 def naive_comp(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
     out[0] = 0
   out[1:] = [arg[i]-arg[i-1] for i in xrange(1,len(arg))]
   return out
Beispiel #28
0
 def fast(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
     out[0] = 0
   subtract(arg[1:],arg[:-1],out[1:])
   return out
Beispiel #29
0
 def simple1(arg1,arg2):
   gini1 = Gini()
   gini2 = Gini2()
   args = [as_num_array(arg) for arg in (arg1,arg2) if len(arg)]
   if len(args) != 2:
     return 0.0
   return gini1(concatenate(args)) - gini2(*args)
Beispiel #30
0
 def simple1(arg1, arg2):
     gini1 = Gini()
     gini2 = Gini2()
     args = [as_num_array(arg) for arg in (arg1, arg2) if len(arg)]
     if len(args) != 2:
         return 0.0
     return gini1(concatenate(args)) - gini2(*args)
Beispiel #31
0
 def naive(arg,lower=None,upper=None):
   arg = as_num_array(arg)
   if lower is not None:
     arg = maximum(arg,lower)
   if upper is not None:
     arg = minimum(arg,upper)
   return arg
Beispiel #32
0
 def naive_loop(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
     out[0] = 0
   for i in xrange(1,len(arg)):
     out[i] = arg[i] - arg[i-1]
   return out
Beispiel #33
0
 def iterloop(arg, reset_value=0.0, out=None):
     arg = as_num_array(arg)
     if not out:
         out = arg.new()
     last = 0.0
     for i, value in it.izip(it.count(), arg):
         out[i] = max(reset_value, last + value)
     return out
Beispiel #34
0
 def _prep_testdata(self,*args,**kwargs):
   out = []
   for arg in args:
     enum = {}
     for val in arg:
       enum[val] = 1 + enum.setdefault(val,0)
     out.append(as_num_array(enum.values()))
   return out
Beispiel #35
0
 def naive_loop(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
     out[0] = 0
   for i in xrange(1,len(arg)):
     out[i] = arg[i] - arg[i-1]
   return out
Beispiel #36
0
 def iterloop(arg, reset_value=0.0, out=None):
     arg = as_num_array(arg)
     if not out:
         out = arg.new()
     last = 0.0
     for i, value in it.izip(it.count(), arg):
         out[i] = max(reset_value, last + value)
     return out
Beispiel #37
0
 def groupby2(arg):
   arg = as_num_array(arg)
   n = float(len(arg))
   gfx = [len(list(g))/n for k,g in groupby(sorted(arg))]
   out = 1.0
   for gf in gfx:
     out -= gf * gf
   return out
Beispiel #38
0
 def groupby2(arg):
     arg = as_num_array(arg)
     n = float(len(arg))
     gfx = [len(list(g)) / n for k, g in groupby(sorted(arg))]
     out = 1.0
     for gf in gfx:
         out -= gf * gf
     return out
Beispiel #39
0
 def _prep_testdata(self, *args, **kwargs):
     out = []
     for arg in args:
         enum = {}
         for val in arg:
             enum[val] = 1 + enum.setdefault(val, 0)
         out.append(as_num_array(enum.values()))
     return out
Beispiel #40
0
 def fast(arg,boundaries=[0,100,1000],values=None):
   assert len(boundaries), "at least one boundary is required"
   if values is not None:
     assert len(boundaries)+1 == len(values), "len(values) must be len(boundaries)+1, (%s,%s)" % (len(values),len(boundaries))
   idx = searchsorted(boundaries,arg)
   if values is None:
     return idx
   return as_num_array(values).take(idx)
Beispiel #41
0
 def presort(dep,indep,cutpoints=None,dep_sorted=False):
   dep = as_num_array(dep)
   indep = as_num_array(indep)
   if not dep_sorted:
     idx = argsort(dep)
     dep = take(dep,idx)
     indep = take(indep,idx)
   if cutpoints is None:
     cutpoints = midpoints_integer(indep)
   out = []
   for cut in cutpoints:
     mask = indep < cut
     i1 = nonzero(mask)[0]
     i2 = nonzero(~mask)[0]
     a1 = dep[i1]
     a2 = dep[i2]
     out.append(gini2_presorted(a1,a2))
   return out
Beispiel #42
0
 def presort(dep, indep, cutpoints=None, dep_sorted=False):
     dep = as_num_array(dep)
     indep = as_num_array(indep)
     if not dep_sorted:
         idx = argsort(dep)
         dep = take(dep, idx)
         indep = take(indep, idx)
     if cutpoints is None:
         cutpoints = midpoints_integer(indep)
     out = []
     for cut in cutpoints:
         mask = indep < cut
         i1 = nonzero(mask)[0]
         i2 = nonzero(~mask)[0]
         a1 = dep[i1]
         a2 = dep[i2]
         out.append(gini2_presorted(a1, a2))
     return out
Beispiel #43
0
 def naive_loop(arg,first=False):
   arg = as_num_array(arg)
   if first:
     out = [1]
     for i in xrange(len(arg)-1):
       if arg[i] != arg[i+1]:
         out.append(1)
       else:
         out.append(0)
   else:
     out = []
     for i in xrange(1,len(arg)):
       if arg[i] != arg[i-1]:
         out.append(1)
       else:
         out.append(0)
     out.append(1)
   return as_num_array(out,type='Bool')
Beispiel #44
0
 def groupby1(arg):
   arg = as_num_array(arg)
   histo = [(k,len(list(g))) for k,g in groupby(sorted(arg))]
   n = float(len(arg))
   out = 1.0
   for (val,cnt) in histo:
     gf = cnt/n
     out -= gf * gf
   return out
Beispiel #45
0
 def gsl(arg, mean=0.0, variance=0.0, out=None):
     arg = as_num_array(arg)
     if not out:
         out = arg.new()
     if mean == 0.0:
         out[:] = gaussian_pdf(arg - mean, variance)
     else:
         out[:] = gaussian_pdf(arg, variance)
     return out
Beispiel #46
0
 def gsl(arg,mean=0.0,variance=0.0,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
   if mean == 0.0:
     out[:] = gaussian_pdf(arg-mean,variance)
   else:
     out[:] = gaussian_pdf(arg,variance)
   return out
Beispiel #47
0
 def groupby1(arg):
     arg = as_num_array(arg)
     histo = [(k, len(list(g))) for k, g in groupby(sorted(arg))]
     n = float(len(arg))
     out = 1.0
     for (val, cnt) in histo:
         gf = cnt / n
         out -= gf * gf
     return out
Beispiel #48
0
 def naive_loop(arg, first=False):
     arg = as_num_array(arg)
     if first:
         out = [1]
         for i in xrange(len(arg) - 1):
             if arg[i] != arg[i + 1]:
                 out.append(1)
             else:
                 out.append(0)
     else:
         out = []
         for i in xrange(1, len(arg)):
             if arg[i] != arg[i - 1]:
                 out.append(1)
             else:
                 out.append(0)
         out.append(1)
     return as_num_array(out, type='Bool')
Beispiel #49
0
 def naive_loop(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
   last = 0
   for i in xrange(len(arg)):
     if arg[i] != 0:
       last = arg[i]
     out[i] = last
   return out
Beispiel #50
0
 def fast(arg,first=False):
   arg = as_num_array(arg)
   out = ones(len(arg),type='Bool')
   if first:
     reject = arg[1:] == arg[:-1]
     out[1:] -= reject
   else:
     reject = arg[:-1] == arg[1:]
     out[:-1] -= reject
   return out
Beispiel #51
0
 def naive_loop(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
   last = 0
   for i in xrange(len(arg)):
     if arg[i] != 0:
       last = arg[i]
     out[i] = last
   return out
Beispiel #52
0
 def clip(arg,lower=None,upper=None):
   arg = as_num_array(arg)
   if lower is not None and upper is not None:
     arg = clip(arg,lower,upper)
   else:
     if lower is not None:
       arg = maximum(arg,lower)
     if upper is not None:
       arg = minimum(arg,upper)
   return arg
Beispiel #53
0
 def fast(arg, first=False):
     arg = as_num_array(arg)
     out = ones(len(arg), type='Bool')
     if first:
         reject = arg[1:] == arg[:-1]
         out[1:] -= reject
     else:
         reject = arg[:-1] == arg[1:]
         out[:-1] -= reject
     return out
Beispiel #54
0
 def _prep_testdata(self, *args, **kwargs):
     # benchmark for inputs that are already vectors
     # simplification for tests: dep == indep
     out = [as_num_array(arg) for arg in args]
     if len(out) == 1:
         out.append(out[0].copy())
     if not kwargs.get('dep_sorted'):
         idx = argsort(out[0])
         out = [take(vec, idx) for vec in out]
         kwargs['dep_sorted'] = True
     return (out, kwargs)
Beispiel #55
0
 def loop3(arg):
   arg = as_num_array(arg)
   n = float(len(arg))
   enum = {}
   for val in arg:
     enum[val] = 1 + enum.setdefault(val,0)
   out = 1.0
   for cnt in enum.itervalues():
     gf = (cnt*1.0)/n
     out -= gf * gf
   return out
Beispiel #56
0
 def naive_iter(arg,out=None):
   arg = as_num_array(arg)
   if not out:
     out = arg.new()
   last = 0
   for i,value in izip(it.count(),arg):
     if value != 0:
       out[i] = last = value
     else:
       out[i] = last
   return out