예제 #1
0
class HistoTuple(EquivUnary):
    '''Histogram returned as list of (value,count) tuples

  '''
    name = 'histo_tuple'
    ranking = ('iter_groupby', 'dict1')

    tests = (
        Test([1, 2, 3, 4, 1, 2, 3, 1, 1, 1, 9]) == [(1, 5), (2, 2), (3, 2),
                                                    (4, 1), (9, 1)],
        Test([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) == [(1, 11)],
    )

    @staticmethod
    def iter_groupby(arg):
        arg = as_any_array(arg)
        return sorted([(k, len(list(g))) for k, g in groupby(sorted(arg))])

    @staticmethod
    def dict1(arg, out=None):
        arg = as_any_array(arg)
        out = {}
        for val in arg:
            out[val] = out.get(val, 0) + 1
        return sorted(out.items())
예제 #2
0
class DictMapDefault(EquivUnary):
  '''apply dictionary mapping to each element in array, using default for unmatched

    >>> func = DictMapDefault()
    >>> mapping = {1:100,2:200,3:300,'a':'aaa','b':'bbb'}
    >>> assert func([1,3,6,7,'a',9],mapping=mapping).tolist() == [100,300,6,7,'aaa',9]

  '''
  name = 'dict_map_default'
  ranking = ('naive_comp', 'naive_loop',)

  tests = (
    Test([1,3,6,7,7,9],mapping={1:100,2:200,3:300}) == [100,300,0,0,0,0],
    Test([1,3,6,7,'a',9],mapping={'1':100,'a':'aaa','3':300}) == ['100','300','0','0','aaa','0'],
  )

  @staticmethod
  def naive_loop(arg,mapping={},default=0):
    arg = as_any_array(arg)
    out = []
    for value in arg:
      out.append(mapping.get(value,default))
    return as_any_array(out)

  @staticmethod
  def naive_comp(arg,mapping={},default=0):
    arg = as_any_array(arg)
    out = [mapping.get(value,default) for value in arg]
    return as_any_array(out)
예제 #3
0
파일: window.py 프로젝트: soedjais/augustus
class WindowRange(EquivUnary):
    """Get value range within sliding window on single data vector

  """
    name = 'window_range'
    ranking = ('naive1', 'naive2')
    ranking = ('naive1', )
    bench_sizes = (1, 2, 3, 4, 5, 10, 20, 50, 100, 200, 300, 500, 1000, 5000)

    tests = (
        Test([0, 1, 2, 3]) == [0, 0, 0],
        Test([0, 1, 2, 3], step=2) == [0, 1, 1],
        Test(arange(20), sel=[0, 3, 5, 9, 20]) == [2, 1, 3, 9],
    )

    @staticmethod
    def naive1(arg, sel=None, step=1):
        arg = as_num_array(arg)
        idx = arg_sel_step_to_idx(arg, sel, step)
        jj = idx[:-step]
        kk = idx[step:]
        segments = [arg[j + 1:k + 1] for j, k in izip(jj, kk) if j != k]
        omin = [seg.min() for seg in segments if len(seg)]
        omax = [seg.max() for seg in segments if len(seg)]
        return as_num_array(omax) - omin

    def naive2(arg, sel=None, step=1):
        arg = as_num_array(arg)
        idx = arg_sel_step_to_idx(arg, sel, step)
        jj = idx[:-step]
        kk = idx[step:]
        omin = [arg[j + 1:k + 1].min() for j, k in izip(jj, kk)]
        omax = [arg[j + 1:k + 1].max() for j, k in izip(jj, kk)]
        return as_num_array(omax) - omin
예제 #4
0
파일: utils.py 프로젝트: soedjais/augustus
class RoundAll(EquivUnary):
  '''apply python round function (there is no equivalent ufunc)

    >>> func = RoundAll()
    >>> assert func([12.3456,30,456.12,0],ndigits=2).tolist() == [12.35,30.,456.12,0.]

  '''
  name = 'round_all'
  ranking = ('roundint','naive')

  tests = (
    Test([12.3456,30,456.12,0],ndigits=-1) == [10,30,460,0],
    Test([12.3456,30,456.12,0],ndigits=0) == [12,30,456,0],
    Test([12.3456,30,456.12,0],ndigits=1) == [12.3,30,456.1,0],
    Test([12.3456,30,456.12,0],ndigits=2) == [12.35,30,456.12,0],
    Test([-12.3456,-30,-456.12,0],ndigits=2) == [-12.35,-30,-456.12,0],
  )

  @staticmethod
  def roundint(arg,ndigits=0):
    arg = as_num_array(arg)
    factor = 10**ndigits
    adjust = where(arg>=0,0.5,-0.5)
    out = ((arg*factor)+adjust).astype('Int') / float(factor)
    return out

  @staticmethod
  def naive(arg,ndigits=0):
    arg = as_num_array(arg)
    out = [round(val,ndigits) for val in arg]
    return as_num_array(out)
예제 #5
0
class MidpointsFloat(EquivUnary):
  """Return list of midpoints for all unique values in given vector.
    One typical use is to choose possible cutpoints when tree building.
    Note: returned vector has length of len(unique_values)-1.
    Note: this method applies to any numeric vector type and returns
          float vector.
  """

  ranking = ('vector','naive')

  tests = (
    Test([1.0])				== [],
    Test([2.0])				== [],
    Test([1,3,5])			== [2.0,4.0],
    Test([1,3,3])			== [2.0],
    Test([1.2,1.3,1.5])			== [1.25,1.4],
  )

  @staticmethod
  def naive(arg):
    uniq = sorted(set(arg))
    out = []
    for i in range(len(uniq)-1):
      a,b = uniq[i:i+2]
      out.append((a+b)/2.0)
    return out

  @staticmethod
  def vector(arg):
    uniq = as_num_array(sorted(set(arg)))
    if len(uniq) <= 1:
      return []
    return (uniq[1:]+uniq[:-1])/2.0
예제 #6
0
파일: gini.py 프로젝트: soedjais/augustus
class Gini2(EquivBinary):
    """GINI on a pair of vectors

  """
    itypes = ('i', 'i')

    name = 'gini2'
    ranking = ('simple2', 'simple1')

    tests = (
        Test([1, 1, 1], [1, 1, 1]) == 0.0,
        Test([1, 1, 1], [2, 2, 2]) == 0.0,
        Test([1, 1, 1], [1, 1, 2])**0.222222222222,
        Test([1, 2, 3], [1, 2, 3])**0.666666666667,
        Test([1, 2, 3], [1, 2, 3, 4, 5])**0.75,
    )

    @staticmethod
    def simple2(arg1, arg2):
        args = [as_num_array(arg) for arg in (arg1, arg2) if len(arg)]
        n = float(sum(len(arg) for arg in args))
        return sum((gini(arg) * len(arg) / n) for arg in args)

    @staticmethod
    def simple1(arg1, arg2):
        gini = Gini()
        args = [as_num_array(arg) for arg in (arg1, arg2) if len(arg)]
        n = float(sum(len(arg) for arg in args))
        return sum((gini(arg) * len(arg) / n) for arg in args)
예제 #7
0
class DeltaPrev(EquivUnary):
  '''difference from previous value

    >>> func = DeltaPrev()
    >>> assert func([1,3,6,7,7,9]).tolist() == [0,2,3,1,0,2]

  '''
  name = 'delta_prev'
  ranking = ('smart','fast', 'naive_comp', 'naive_loop',)

  tests = (
    Test([1,3,6,7,7,9]) == [0,2,3,1,0,2],
    Test([0,3,0,7,0,0]) == [0,3,-3,7,-7,0],
    Test([0])           == [0],
  )

  @staticmethod
  def naive_loop(arg,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
      out[0] = 0
    for i in xrange(1,len(arg)):
      out[i] = arg[i] - arg[i-1]
    return out

  @staticmethod
  def naive_comp(arg,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
      out[0] = 0
    out[1:] = [arg[i]-arg[i-1] for i in xrange(1,len(arg))]
    return out

  @staticmethod
  def fast(arg,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
      out[0] = 0
    subtract(arg[1:],arg[:-1],out[1:])
    return out

  @classmethod
  def smart(self,arg,out=None):
    if len(arg) < 10:
      return self.naive_loop(arg,out)
    return self.fast(arg,out)

  @staticmethod
  def _check_result(out,arg,**kwargs):
    assert out[0] == 0
    for o,a1,a2 in izip(out[1:],arg[1:],arg[:-1]):
      assert o == a1-a2
    return True
예제 #8
0
def run():
    loader = Loader()
    loader.load()
    # AMQP-0-9-1
    adapter = loader.catalog['amqp-0-9-1']
    test = Test(URL, adapter)
    test()
    # amqp
    adapter = loader.catalog['amqp']
    test = Test(URL, adapter)
    test()
예제 #9
0
파일: test_proton.py 프로젝트: jortel/gofer
def run():
    loader = Loader()
    loader.load()
    # amqp-1-0
    adapter = loader.catalog['amqp-1-0']
    test = Test(URL, adapter)
    test.test_crud()
    test.test_no_exchange()
    # proton
    adapter = loader.catalog['proton']
    test = Test(URL, adapter)
    test.test_crud()
    test.test_no_exchange()
예제 #10
0
def run():
    # AMQP-0-10
    loader = Loader()
    loader.load()
    adapter = loader.catalog['amqp-0-10']
    test = Test(URL, adapter)
    test()
    # qpid
    adapter = loader.catalog['qpid']
    test = Test(URL, adapter)
    test()
    # qpid-messaging
    adapter = loader.catalog['qpid.messaging']
    test = Test(URL, adapter)
    test()
예제 #11
0
class FuncMap(EquivUnary):
  '''apply function to each element in array

    >>> func = FuncMap()
    >>> mapping = {1:100,2:200,3:300,'a':'aaa','b':'bbb'}
    >>> assert func([1,3,6,7,7,9],func=mapping.get).tolist() == [100,300,6,7,7,9]

  '''
  name = 'func_map'
  ranking = ('naive_comp', 'naive_loop',)

  tests = (
    Test([1,3,6,7,7,9],func={1:100,2:200,3:300,6:6,7:7,9:9}.get) == [100,300,6,7,7,9],
  )

  @staticmethod
  def _default_func(arg): return arg

  @staticmethod
  def naive_loop(arg,func=None):
    func = func or FuncMap._default_func
    arg = as_any_array(arg)
    out = []
    for value in arg:
      out.append(func(value))
    return as_any_array(out)

  @staticmethod
  def naive_comp(arg,func=None):
    func = func or FuncMap._default_func
    arg = as_any_array(arg)
    out = [func(value) for value in arg]
    return as_any_array(out)
예제 #12
0
class GaussianPdf(EquivUnary):
  '''Gaussian probability distribution function

    >>> func = GaussianPdf().gsl
    >>> assert allclose(func([1.2,0.1,0.5],variance=1.0),[0.19418605,0.39695255,0.35206533])


  '''
  name = 'gaussian_pdf'
  ranking = ('gsl',)

  tests = (
    Test([1.2,0.1,0.5],variance=1.0) ** [0.19418605,0.39695255,0.35206533],
  )

  @staticmethod
  def gsl(arg,mean=0.0,variance=0.0,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
    if mean == 0.0:
      out[:] = gaussian_pdf(arg-mean,variance)
    else:
      out[:] = gaussian_pdf(arg,variance)
    return out
예제 #13
0
class UniqueMask(EquivUnary):
    '''given sorted data, return mask selecting unique values.
    Typically used to reduce data with identical timestamps.
    By default, last value from a run of equal values is taken.

  '''
    name = 'unique_mask'
    ranking = (
        'fast',
        'naive_loop',
    )

    tests = (
        Test([0]) == [1],
        Test([0], first=True) == [1],
        Test([1, 2, 3, 7, 8, 9]) == [1, 1, 1, 1, 1, 1],
        Test([1, 2, 3, 7, 8, 9], first=True) == [1, 1, 1, 1, 1, 1],
        Test([1, 3, 3, 3, 4, 5]) == [1, 0, 0, 1, 1, 1],
        Test([1, 3, 3, 3, 4, 5], first=True) == [1, 1, 0, 0, 1, 1],
        Test([1, 1, 3, 3, 5, 5], first=False) == [0, 1, 0, 1, 0, 1],
        Test([1, 1, 3, 3, 5, 5], first=True) == [1, 0, 1, 0, 1, 0],
    )

    @staticmethod
    def naive_loop(arg, first=False):
        arg = as_num_array(arg)
        if first:
            out = [1]
            for i in xrange(len(arg) - 1):
                if arg[i] != arg[i + 1]:
                    out.append(1)
                else:
                    out.append(0)
        else:
            out = []
            for i in xrange(1, len(arg)):
                if arg[i] != arg[i - 1]:
                    out.append(1)
                else:
                    out.append(0)
            out.append(1)
        return as_num_array(out, type='Bool')

    @staticmethod
    def fast(arg, first=False):
        arg = as_num_array(arg)
        out = ones(len(arg), type='Bool')
        if first:
            reject = arg[1:] == arg[:-1]
            out[1:] -= reject
        else:
            reject = arg[:-1] == arg[1:]
            out[:-1] -= reject
        return out
예제 #14
0
class MidpointsInteger(EquivUnary):
  """Return list of midpoints for all unique values in given vector.
    One typical use is to choose possible cutpoints when tree building.
    Note: returned vector has length of len(unique_values)-1.
    Note: this method applies only to integer vectors and returns
          integer vector
  """

  ranking = ('vector','naive')

  tests = (
    Test([1])				== [],
    Test([2])				== [],
    Test([1,1,2])			== [2],
    Test([2,2,3])			== [3],
    Test([1,3,5])			== [2,4],
    Test([-5,-1,0,1,8,2,2])		== [-3,0,1,2,5],
  )

  @staticmethod
  def naive(arg):
    uniq = sorted(set(arg))
    out = []
    for i in range(len(uniq)-1):
      a,b = uniq[i:i+2]
      out.append(((a+b)+1)//2)
    return out

  @staticmethod
  def vector(arg):
    uniq = as_num_array(sorted(set(arg)))
    if len(uniq) <= 1:
      return []
    # assuming integer values and '<' op, the +1 causes round-up
    return (uniq[1:]+uniq[:-1]+1)//2
예제 #15
0
파일: utils.py 프로젝트: dlocke/unitable
class RangeCap(EquivUnary):
    '''cap extreme values at specified limits

    >>> func = RangeCap()
    >>> assert func([1,3,-9,-3,6,7,9],lower=-5,upper=6).tolist() == [1,3,-5,-3,6,6,6]

  '''
    name = 'range_cap'
    ranking = ('naive', 'clip')

    tests = (
        Test([1, 3, -9, -3, 6, 7, 9, 0], lower=None,
             upper=None) == [1, 3, -9, -3, 6, 7, 9, 0],
        Test([1, 3, -9, -3, 6, 7, 9, 0], lower=5,
             upper=None) == [5, 5, 5, 5, 6, 7, 9, 5],
        Test([1, 3, -9, -3, 6, 7, 9, 0], lower=-5,
             upper=6) == [1, 3, -5, -3, 6, 6, 6, 0],
    )

    @staticmethod
    def clip(arg, lower=None, upper=None):
        arg = as_num_array(arg)
        if lower is not None and upper is not None:
            arg = clip(arg, lower, upper)
        else:
            if lower is not None:
                arg = maximum(arg, lower)
            if upper is not None:
                arg = minimum(arg, upper)
        return arg

    @staticmethod
    def naive(arg, lower=None, upper=None):
        arg = as_num_array(arg)
        if lower is not None:
            arg = maximum(arg, lower)
        if upper is not None:
            arg = minimum(arg, upper)
        return arg
예제 #16
0
class Discretize(EquivUnary):
  """Discretize according to sorted segment table

  >>> func = Discretize()
  >>> b = as_num_array([1,10,50])
  >>> v = as_num_array([0,5,35,100])
  >>> data = as_num_array([100,75,50,25,2,1,0])

  >>> print func(data,boundaries=b)
  [3 3 2 2 1 0 0]

  >>> print func(data,boundaries=b,values=v)
  [100 100  35  35   5   0   0]

  """

  ranking = ('fast','loop')

  tests = (
    Test([0],boundaries=[0])	   			== [0],
    Test([1,2,3,4,5],boundaries=[3])   			== [0,0,0,1,1],
    Test([1,2,1.3,1.4,1.33],boundaries=[1.33])		== [0,1,0,1,0],
    Test([100,75,50,25,2,1,0],boundaries=[1,10,50])	== [3,3,2,2,1,0,0],
    Test([100,75,50,25,2,1,0],boundaries=[1,10,50],values=[0,5,35,100])	== [100,100,35,35,5,0,0],
    Test([100,-25,50,75,2,0,1],boundaries=[1,10,50],values=[0,5,35,100])== [100,0,35,100,5,0,0],
    Test([1,2,3,4,5],boundaries=[3,2,1])		== [0,0,0,3,3], # nonsense
  )

  @staticmethod
  def fast(arg,boundaries=[0,100,1000],values=None):
    assert len(boundaries), "at least one boundary is required"
    if values is not None:
      assert len(boundaries)+1 == len(values), "len(values) must be len(boundaries)+1, (%s,%s)" % (len(values),len(boundaries))
    idx = searchsorted(boundaries,arg)
    if values is None:
      return idx
    return as_num_array(values).take(idx)

  @staticmethod
  def loop(arg,boundaries=[0,100,1000],values=None):
    assert len(boundaries), "at least one boundary is required"
    if values is not None:
      assert len(boundaries)+1 == len(values), "len(values) must be len(boundaries)+1, (%s,%s)" % (len(values),len(boundaries))
    idx = []
    for value in arg:
      for i,x in it.izip(it.count(),boundaries):
        if value <= x:
          idx.append(i)
          break
      else:
        idx.append(len(boundaries))
    if values is None:
      return idx
    return [values[i] for i in idx]
예제 #17
0
class UniformPdf(EquivUnary):
    '''Uniform probability distribution function

    >>> func = UniformPdf().gsl
    >>> assert allclose(func([1.2,0.1,0.5],b=1.0),[0.0,1.0,1.0])


  '''
    name = 'uniform_pdf'
    ranking = ('gsl', )

    tests = (Test([1.2, 0.1, 0.5], b=1.0)**[0.0, 1.0, 1.0], )

    @staticmethod
    def gsl(arg, a=0.0, b=0.0, out=None):
        arg = as_num_array(arg)
        if not out:
            out = arg.new()
        out[:] = flat_pdf(arg, a, b)
        return out
예제 #18
0
class ExponentialPdf(EquivUnary):
    '''Exponential probability distribution function

    >>> func = ExponentialPdf().gsl
    >>> assert allclose(func([1.2,0.1,0.5],mu=1.0),[0.30119421,0.90483742,0.60653066])


  '''
    name = 'exponential_pdf'
    ranking = ('gsl', )

    tests = (Test([1.2, 0.1, 0.5],
                  mu=1.0)**[0.30119421, 0.90483742, 0.60653066], )

    @staticmethod
    def gsl(arg, mu=0.0, out=None):
        arg = as_num_array(arg)
        if not out:
            out = arg.new()
        out[:] = exponential_pdf(arg, mu)
        return out
예제 #19
0
class CusumReset(EquivUnary):
    '''CUSUM with reset algorithm

    >>> func = CusumReset().iterfunc
    >>> assert func([1,-3,6,7,-7,-9]).tolist() == [1,0,6,13,6,0]

  '''
    name = 'cusum_reset'
    ranking = ('iterfunc', )
    #ranking = ('iterfunc','iterloop')

    tests = (Test([1, -3, 6, 7, -7, -9]) == [1, 0, 6, 13, 6, 0], )

    @staticmethod
    def iterfunc(arg, reset_value=0.0, out=None):
        def gen_cusum(data, reset_value=0.0):
            # no obvious way to vectorize this
            out = 0.0
            for value in data:
                out = max(reset_value, out + value)
                yield out

        arg = as_num_array(arg)
        if out is None:
            out = arg.new()
        out[:] = list(gen_cusum(arg, reset_value))
        return out

    @staticmethod
    def iterloop(arg, reset_value=0.0, out=None):
        arg = as_num_array(arg)
        if not out:
            out = arg.new()
        last = 0.0
        for i, value in it.izip(it.count(), arg):
            out[i] = max(reset_value, last + value)
        return out
예제 #20
0
class CarryForward(EquivUnary):
  '''fill empty values with previous non-empty value

    >>> func = CarryForward().fast
    >>> assert func([0,3,0,0,4,0]).tolist() == [0,3,3,3,4,4]

  '''
  name = 'carry_forward'
  ranking = ('naive_loop', 'naive_iter', 'array_idx')

  tests = (
    Test([0,3,0,0,4,0]) == [0,3,3,3,4,4],
    Test([0,0,0,0,0,0]) == [0,0,0,0,0,0],
    Test([0,0,0,0,0,5]) == [0,0,0,0,0,5],
    Test([9,0,0,0,0,0]) == [9,9,9,9,9,9],
  )

  @staticmethod
  def array_idx(arg,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
    idx = arg.nonzero()[0]
    try:
      first = idx[0]
    except IndexError:
      first = len(out)
    out[:first] = 0
    if not len(idx):
      return out
    a,b = it.tee(idx)
    b.next()
    for start,stop in izip(a,b):
      out[start:stop] = arg[start]
    last = idx[-1]
    out[last:] = arg[last]
    return out

  @staticmethod
  def naive_iter(arg,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
    last = 0
    for i,value in izip(it.count(),arg):
      if value != 0:
        out[i] = last = value
      else:
        out[i] = last
    return out

  @staticmethod
  def naive_loop(arg,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
    last = 0
    for i in xrange(len(arg)):
      if arg[i] != 0:
        last = arg[i]
      out[i] = last
    return out
예제 #21
0
def run():
    print URL
    test = Test(URL)
    test.test_crud()
    test.test_no_exchange()
예제 #22
0
class PartitionIntegerGini(EquivBinary):
    """Given a pair of dependent+independent integer vectors,
      consider all possible cutpoints in the independent vector, and
      return list of Gini scores for resulting splits in dependent.

  """
    itypes = ('i', 'i')

    ranking = ('isort', 'deltacnt2', 'deltacnt1', 'deltacnt0', 'presort',
               'idxsel', 'masksel', 'naive')
    bench_sizes = (
        1,
        2,
        3,
        4,
        5,
        10,
        20,
        50,
        100,
        200,
        300,
        500,
        1000,
        5000,
        10000,
        # the following sizes take too long to include in the default benchmarking run
        #50000,
        #100000,
        #500000,
        #1000000,
    )

    tests = (
        Test([1, 2]) == [0.0],
        Test([1, 2, 3])**([0.3333333] * 2),
        Test([1, 2, 3, 4])**([0.5] * 3),
        Test([1, 2, 3, 4, 5])**([0.5999999] * 4),
        Test([0, 1, 2, 3, 4])**([0.5999999] * 4),
        Test([1, 2, 3, 2, 1])**[0.2666666, 0.4],
        Test([3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
              1])**[0.071428571428571425, 0.13186813186813176],
        Test([1, 2, 3, 4], [6, 7, 6, 8])**[0.5, 0.5],
        Test([1, 2, 3, 4, 5, 6], [6, 7, 6, 8, 9, 6])**([0.6666666] * 3),
        Test([1, 2, 1, 2, 1, 2], [6, 7, 8, 8, 9, 9])**[0.3999999, 0.5, 0.5],
        Test([1, 2, 1, 2, 1, 2],
             [4, 5, 6, 7, 8, 9])**[0.3999999, 0.5, 0.4444444, 0.5, 0.3999999],
        Test([1, 2, 1, 2, 1, 2], [4, 5, 6, 7, 8, 9],
             cutpoints=[7, 8])**[0.4444444, 0.5],
    )

    def _prep_testdata(self, *args, **kwargs):
        # benchmark for inputs that are already vectors
        # simplification for tests: dep == indep
        out = [as_num_array(arg) for arg in args]
        if len(out) == 1:
            out.append(out[0].copy())
        if not kwargs.get('dep_sorted'):
            idx = argsort(out[0])
            out = [take(vec, idx) for vec in out]
            kwargs['dep_sorted'] = True
        return (out, kwargs)

    @staticmethod
    def smart(dep, indep, cutpoints=None, **kwargs):
        # not needed unless high penalty for small datasets
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        if len(dep) < 100:
            return PartitionIntegerGini.naive(dep,
                                              indep,
                                              cutpoints=cutpoints,
                                              **kwargs)
        return PartitionIntegerGini.isort(dep,
                                          indep,
                                          cutpoints=cutpoints,
                                          **kwargs)

    @staticmethod
    def isort(dep, indep, cutpoints=None, **kwargs):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        if not len(cutpoints):
            return []
        # sort both vectors by *indep*
        idx = argsort(indep)
        dep = take(dep, idx)
        indep = take(indep, idx)
        #print len(dep),len(indep),len(cutpoints)
        cutidx = [0, 0]
        for ival, isub in it.groupby(indep):
            ilen = len(list(isub))
            if ival < cutpoints[len(cutidx) - 2]:
                cutidx[-1] += ilen
            else:
                if len(cutidx) > len(cutpoints):
                    break
                cutidx.append(cutidx[-1] + ilen)
        assert len(cutidx) - 1 == len(
            cutpoints), '%s != %s' % (len(cutidx) - 1, len(cutpoints))
        out = []
        cnt2 = dict(histo_tuple(dep))
        cnt1 = dict.fromkeys(cnt2.keys(), 0)
        for i1, i2 in izip(cutidx[:-1], cutidx[1:]):
            # update the counts from the last cut
            for d, cnt in histo_tuple(dep[i1:i2]):
                cnt1[d] += cnt
                cnt2[d] -= cnt
            # calculate results based on counts
            a1 = as_num_array([val for val in cnt1.itervalues() if val != 0])
            a2 = as_num_array([val for val in cnt2.itervalues() if val != 0])
            out.append(gini2_counts(a1, a2))
        assert len(out) == len(cutpoints), '%s != %s' % (len(out),
                                                         len(cutpoints))
        return out

    @staticmethod
    def deltacnt2(dep, indep, cutpoints=None, **kwargs):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        # dictionary of counts in each dataset
        cnt2 = dict(histo_tuple(dep))
        cnt1 = dict.fromkeys(cnt2.keys(), 0)
        lastmask = (indep != indep)
        for cut in cutpoints:
            mask = indep < cut
            # examine only the new values from the last cut
            maskdelta = mask & ~lastmask
            lastmask |= mask
            idxdelta = nonzero(maskdelta)[0]
            # update the counts from the last cut
            for d, cnt in histo_tuple(dep[idxdelta]):
                cnt1[d] += cnt
                cnt2[d] -= cnt
            # calculate results based on counts
            a1 = as_num_array([val for val in cnt1.itervalues() if val != 0])
            a2 = as_num_array([val for val in cnt2.itervalues() if val != 0])
            out.append(gini2_counts(a1, a2))
        return out

    @staticmethod
    def deltacnt1(dep, indep, cutpoints=None, **kwargs):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        # dictionary of counts in each dataset
        cnt2 = dict(histo_tuple(dep))
        cnt1 = dict.fromkeys(cnt2.keys(), 0)
        lastmask = (indep != indep)
        for cut in cutpoints:
            mask = indep < cut
            # examine only the new values from the last cut
            maskdelta = mask & ~lastmask
            lastmask |= mask
            idxdelta = nonzero(maskdelta)[0]
            # update the counts from the last cut
            for d in dep[idxdelta]:
                cnt1[d] += 1
                cnt2[d] -= 1
            # calculate results based on counts
            a1 = as_num_array([val for val in cnt1.itervalues() if val != 0])
            a2 = as_num_array([val for val in cnt2.itervalues() if val != 0])
            out.append(gini2_counts(a1, a2))
        return out

    @staticmethod
    def deltacnt0(dep, indep, cutpoints=None, **kwargs):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        # get vector of counts in each dataset
        tmp = histo_tuple(dep)
        dep_keys = dict((x[0], i) for i, x in enumerate(tmp))
        cnt2 = as_num_array([x[1] for x in tmp])
        cnt1 = cnt2 - cnt2
        lastmask = (indep != indep)
        for cut in cutpoints:
            mask = indep < cut
            maskdelta = mask & ~lastmask
            lastmask |= mask
            for d in dep[maskdelta]:
                key = dep_keys[d]
                cnt1[key] += 1
                cnt2[key] -= 1
            a1 = cnt1[cnt1 != 0]
            a2 = cnt2[cnt2 != 0]
            out.append(gini2_counts(a1, a2))
        return out

    @staticmethod
    def presort(dep, indep, cutpoints=None, dep_sorted=False):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if not dep_sorted:
            idx = argsort(dep)
            dep = take(dep, idx)
            indep = take(indep, idx)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        for cut in cutpoints:
            mask = indep < cut
            i1 = nonzero(mask)[0]
            i2 = nonzero(~mask)[0]
            a1 = dep[i1]
            a2 = dep[i2]
            out.append(gini2_presorted(a1, a2))
        return out

    @staticmethod
    def idxsel(dep, indep, cutpoints=None, **kwargs):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        for cut in cutpoints:
            mask = indep < cut
            i1 = nonzero(mask)[0]
            i2 = nonzero(~mask)[0]
            a1 = dep[i1]
            a2 = dep[i2]
            out.append(gini2(a1, a2))
        return out

    @staticmethod
    def masksel(dep, indep, cutpoints=None, **kwargs):
        dep = as_num_array(dep)
        indep = as_num_array(indep)
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        for cut in cutpoints:
            mask = indep < cut
            a1 = dep[mask]
            a2 = dep[~mask]
            out.append(gini2(a1, a2))
        return out

    @staticmethod
    def naive(dep, indep, cutpoints=None, **kwargs):
        if cutpoints is None:
            cutpoints = midpoints_integer(indep)
        out = []
        for cut in cutpoints:
            a1 = [d for d, i in izip(dep, indep) if i < cut]
            a2 = [d for d, i in izip(dep, indep) if i >= cut]
            out.append(gini2(a1, a2))
        return out
예제 #23
0
def run():
    loader = Loader()
    loader.load()
    # amqp-1-0
    adapter = loader.catalog['amqp-1-0']
    test = Test(URL, adapter)
    test.test_crud()
    test.test_no_exchange()
    # proton
    adapter = loader.catalog['proton']
    test = Test(URL, adapter)
    test.test_crud()
    test.test_no_exchange()
예제 #24
0
파일: window.py 프로젝트: soedjais/augustus
class WindowApply(EquivUnary):
    """Apply function to sliding window within single data vector

  """
    name = 'window_apply'
    ranking = ('naive1', 'naive2')
    bench_sizes = (1, 2, 3, 4, 5, 10, 20, 50, 100, 200, 300, 500, 1000, 5000)

    tests = (
      Test(arange(20),sel=[3,5,9,19]) == [6,9,30,145],

      Test(arange(10),sel=zeros(10,type='Bool'),step=1) == [],
      Test(arange(10),sel=ones(10,type='Bool'),step=1)	\
       == [0,1,2,3,4,5,6,7,8,9],
      Test(arange(10),sel=(arange(10)%2)==0,step=1) == [0,3,7,11,15],
      Test(arange(10),sel=(arange(10)%2)!=0,step=1) == [1,5,9,13,17],

      Test(arange(10),step=2)  == [1,3,5,7,9,11,13,15,17],
      Test(arange(1,11),step=2)  == [2,5,7,9,11,13,15,17,19],
      Test(arange(10),step=3)  == [3,6,9,12,15,18,21,24],
      Test(arange(1,11),step=3)  == [5,9,12,15,18,21,24,27],
      Test(arange(2,12),step=3)  == [7,12,15,18,21,24,27,30],
      Test([0,1,2,3],func=sum)  == [0,1,2,3],
      Test([0,1,2,3],step=2)  == [1,3,5],
      #Test([0,1,2,3],func=minimum)	== [0,1,2,3],
    )

    def _prep_testdata_broken(self, *args, **kwargs):
        out = [as_num_array(arg) for arg in args]
        if not kwargs:
            # automatic test cases
            kwargs['sel'] = as_num_array(args[0]) == 0
        return (out, kwargs)

    @staticmethod
    def naive1(arg, sel=None, step=1, func=sum):
        arg = as_num_array(arg)
        idx = arg_sel_step_to_idx(arg, sel, step)
        out = []
        for i in xrange(len(idx) - step):
            j = idx[i]
            k = idx[i + step]
            chunk = arg[j + 1:k + 1]
            out.append(func(chunk))
        return as_num_array(out)

    @staticmethod
    def naive2(arg, sel=None, step=1, func=sum):
        arg = as_num_array(arg)
        idx = arg_sel_step_to_idx(arg, sel, step)
        jj = idx[:-step]
        kk = idx[step:]
        out = []
        for j, k in izip(jj, kk):
            chunk = arg[j + 1:k + 1]
            out.append(func(chunk))
        return as_num_array(out)
예제 #25
0
파일: gini.py 프로젝트: soedjais/augustus
class Gini(EquivUnary):
    """GINI on a single vector

  """
    itypes = 'i'

    name = 'gini'
    ranking = ('groupby2', 'smart', 'loop3', 'groupby3', 'groupby1', 'loop2',
               'loop1')

    tests = (
        Test([0, 0, 0]) == 0.0,
        Test([1, 1, 1]) == 0.0,
        Test([0, 0, 1])**0.444444444444,
        Test([0, 1, 1])**0.444444444444,
        Test([1, 1, 2])**0.444444444444,
        Test([1, 2, 2])**0.444444444444,
        Test([1, 2, 3])**0.666666666667,
        Test([1, 2, 3, 4]) == 0.75,
        Test([4, 3, 2, 1]) == 0.75,
        Test([1, 2, 3, 2, 3, 3])**0.611111111111,
        Test([2, 1, 3, 200, 3000, 30000])**0.833333333333,
        Test(range(100))**0.99,
        Test(range(1000))**0.999,
    )

    @staticmethod
    def smart(arg):
        arg = as_num_array(arg)
        if len(arg) < 50:
            return Gini.loop3(arg)
        return Gini.groupby2(arg)

    @staticmethod
    def groupby3(arg):
        arg = as_num_array(arg)
        n = float(len(arg))
        gfx = as_num_array([len(list(g)) for k, g in groupby(sort(arg))]) / n
        gfx *= gfx
        out = 1.0 - gfx.sum()
        return out

    @staticmethod
    def groupby2(arg):
        arg = as_num_array(arg)
        n = float(len(arg))
        gfx = [len(list(g)) / n for k, g in groupby(sorted(arg))]
        out = 1.0
        for gf in gfx:
            out -= gf * gf
        return out

    @staticmethod
    def groupby1(arg):
        arg = as_num_array(arg)
        histo = [(k, len(list(g))) for k, g in groupby(sorted(arg))]
        n = float(len(arg))
        out = 1.0
        for (val, cnt) in histo:
            gf = cnt / n
            out -= gf * gf
        return out

    @staticmethod
    def loop3(arg):
        arg = as_num_array(arg)
        n = float(len(arg))
        enum = {}
        for val in arg:
            enum[val] = 1 + enum.setdefault(val, 0)
        out = 1.0
        for cnt in enum.itervalues():
            gf = (cnt * 1.0) / n
            out -= gf * gf
        return out

    @staticmethod
    def loop2(arg):
        arg = as_num_array(arg)
        n = float(len(arg))
        enum = {}
        for val in arg:
            cnt = enum.get(val, 0)
            enum[val] = cnt + 1
        out = 1.0
        for (val, cnt) in enum.iteritems():
            gf = (cnt * 1.0) / n
            out -= gf * gf
        return out

    @staticmethod
    def loop1(arg):
        arg = as_num_array(arg)
        n = float(len(arg))
        enum = {}
        for val in arg:
            cnt = enum.get(val, 0)
            enum[val] = cnt + 1
        out = 1.0
        for (val, cnt) in enum.items():
            gf = (cnt * 1.0) / n
            out -= gf * gf
        return out
예제 #26
0
파일: gini.py 프로젝트: soedjais/augustus
class GiniGain(EquivBinary):
    """GINI gain for a given split

  Note: this is not likely to be the most efficient method for tree
  building since it would result in repeated calculation of the gini()
  of the combined nodes.  Better to have the application calculate that
  once and then loop over the possible splits.
  
  This method is intended more as documentation and as an additional
  test of the combination of the above methods.

  """
    itypes = ('i', 'i')

    name = 'gini_gain'
    ranking = ('simple1', )

    tests = (
        Test([], [1, 1, 1, 1, 1, 1]) == 0.0,
        Test([1], [1, 1, 1, 1, 1]) == 0.0,
        Test([1, 1, 1], [1, 1, 1]) == 0.0,
        Test([1], [1, 1, 2, 2, 2])**0.1,
        Test([1, 1], [1, 2, 2, 2]) == 0.25,
        Test([1, 1, 1], [2, 2, 2]) == 0.5,
        Test([1, 1, 1, 2], [2, 2]) == 0.25,
        Test([1, 1, 1], [1, 1, 2])**0.055555555555,
        Test([1, 2, 3], [1, 2, 3]) == 0.0,
        Test([1, 2, 3], [1, 2, 3, 4, 5])**0.03125,
        Test([1, 3], [2, 999]) == 0.25,
    )

    @staticmethod
    def simple1(arg1, arg2):
        gini1 = Gini()
        gini2 = Gini2()
        args = [as_num_array(arg) for arg in (arg1, arg2) if len(arg)]
        if len(args) != 2:
            return 0.0
        return gini1(concatenate(args)) - gini2(*args)
예제 #27
0
class GetShift(EquivUnary):
  '''get value of other column shifted by some offset
    (typically useful for previous value (or arbitrary offset into column)

    >>> func = GetShift()
    >>> assert func([1,3,6,7,7,9],filler=99).tolist() == [3,6,7,7,9,99]

  '''
  name = 'get_shift'
  ranking = ('smart','fast', 'naive_comp', 'naive_loop',)

  tests = (
    Test([0])                     == [0],
    Test([1,3,6,7,7,9])           == [0,1,3,6,7,7],
    Test([1,3,6,7,7,9],offset=3)  == [0,0,0,1,3,6,],
    Test([1,3,6,7,7,9],offset=-2) == [6,7,7,9,0,0],
    Test([1,3,6,7,7,9],filler=99) == [99,1,3,6,7,7],
  )

  @staticmethod
  def naive_loop(arg,offset=1,filler=0,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
    if offset < 0:
      cut = len(arg)+offset
      for i in xrange(cut):
        out[i] = arg[i-offset]
      for i in xrange(cut,len(arg)):
        out[i] = filler
    else:
      cut = offset
      for i in xrange(cut):
        out[i] = filler
      for i in xrange(cut,len(arg)):
        out[i] = arg[i-offset]
    return out

  @staticmethod
  def naive_comp(arg,offset=1,filler=0,out=None):
    arg = as_num_array(arg)
    if not out:
      out = arg.new()
    if offset < 0:
      cut = len(arg)+offset
      out[:cut] = [arg[i] for i in xrange(-offset,len(arg))]
      out[cut:] = [filler]*(-offset)
    else:
      cut = offset
      out[:cut] = [filler]*(offset)
      out[cut:] = [arg[i] for i in xrange(len(arg)-cut)]
    return out

  @staticmethod
  def fast(arg,offset=1,filler=0,out=None):
    arg = as_num_array(arg)
    if offset < 0:
      cut = len(arg)+offset
      out1 = arg[-offset:len(arg)]
      out2 = (-offset)*[filler]
    else:
      cut = offset
      out1 = offset*[filler]
      out2 = arg[:len(arg)-cut]
    if not out:
      out = arg.new()
    out[:cut] = out1
    out[cut:] = out2
    return out

  @classmethod
  def smart(self,arg,offset=1,filler=0,out=None):
    if len(arg) < 100:
      return self.naive_loop(arg,offset=offset,filler=filler,out=out)
    return self.fast(arg,offset=offset,filler=filler,out=out)

  @staticmethod
  def _check_result(out,arg,**kwargs):
    offset = kwargs.get('offset',1)
    filler = kwargs.get('filler',0)
    if offset < 0:
      cut = len(arg)+offset
      for i in xrange(cut):
        assert out[i] == arg[i-offset]
      for i in xrange(cut,len(arg)):
        assert out[i] == filler
    else:
      cut = offset
      for i in xrange(cut):
        assert out[i] == filler
      for i in xrange(cut,len(arg)):
        assert out[i] == arg[i-offset]
    return True
예제 #28
0
class LinearBins(EquivUnary):
  """Discretize to given number of equal sized bins, returning either
    1) if idx==True, the bin number (0 to bins-1) for each value.
    2) otherwise returning midpoint value for the range in each bin.
  This is mainly a convenience wrapper to Discretize().

  """

  ranking = ('wrapper',)

  tests = (
    Test([0])	   				== [0],
    Test([0,2],bins=1)   			== [1.0,1.0],
    Test([0,2],bins=2)   			== [0.5,1.5],
    Test([0,2],bins=3)   			** [0.333333,1.666666],
    Test([0,1,2],bins=3)   			** [0.333333,1.0,1.666666],
    Test([1,2,3,4,5],bins=3)   			** [1.666666,1.666666,3.0,4.333333,4.333333],
    Test([1,2,1.3,1.4,1.33],bins=2)		== [1.25,1.75,1.25,1.25,1.25],
    Test([1,2,1.3,1.4,1.33],bins=2,min=0.5)	== [0.875,1.625,1.625,1.625,1.625],
    Test([1,2,1.3,1.4,1.33],bins=2,min=-1)	== [1.25,1.25,1.25,1.25,1.25],

    Test([0],idx=True)	   				== [0],
    Test([0,2],bins=1,idx=True)   			== [0,0],
    Test([0,2],bins=2,idx=True)   			== [0,1],
    Test([0,2],bins=3,idx=True)   			== [0,2],
    Test([0,1,2],bins=3,idx=True)   			== [0,1,2],
    Test([1,2,3,4,5],bins=3,idx=True)   		== [0,0,1,2,2],
    Test([1,2,1.3,1.4,1.33],bins=2,idx=True)		== [0,1,0,0,0],
    Test([1,2,1.3,1.4,1.33],bins=2,min=0.5,idx=True)	== [0,1,1,1,1],
    Test([1,2,1.3,1.4,1.33],bins=2,min=-1,idx=True)	== [1,1,1,1,1],
    Test([100,75,50,25,2,1,0],bins=35,max=500,idx=True)	== [6,5,3,1,0,0,0],
  )

  @staticmethod
  def wrapper(arg,bins=10,min=None,max=None,idx=None):
    arg = as_num_array(arg)
    if min is None:
      min = arg.min()
    if max is None:
      max = arg.max()
    step = (max - min) / bins
    if not step:
      return zeros(len(arg))
    boundaries = arange(min+step,max+step,step)
    if idx:
      values = None
    else:
      values = arange(min+(step/2),max+step+step,step)[:len(boundaries)+1]
    return discretize(arg,boundaries=boundaries,values=values)
예제 #29
0
def run():
    print(URL)
    test = Test(URL)
    test()
예제 #30
0
class LookAheadIndex(EquivUnary):
  """Given
        1) a vector of sorted values that may contain duplicates and
        irregular spacing (like event timestamps, for example), and
        2) a numeric delta to add to the vector (to identify the
        future timestamp some fixed interval away),
      return an index vector into the original data vector such that
      for each current value, the index points to either
        1) the first occurrence of current value+delta, or
        2) if the data doesn't contain such value, the first occurrence
        of the lext lower value that does occur in the data.

    If delta==0, then the index points to the first/last occurrence of the
    current value in the data.

  """
  itypes = 'i'

  ranking = ('naive_loop',)
  ranking = ('missing1','naive_loop')
  bench_sizes = (1,2,3,4,5,10,20,50,100,200,300,500,1000,5000,
    #10000,
    #50000,
    #100000,
    #500000,
    #1000000,
  )


  tests = (
    Test([0,0,1,1,1,2,3,5,9],delta=0,first=True)	== [0,0,2,2,2,5,6,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=1,first=True)	== [2,2,5,5,5,6,6,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=2,first=True)	== [5,5,6,6,6,6,7,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=3,first=True)	== [6,6,6,6,6,7,7,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=4,first=True)	== [6,6,7,7,7,7,7,8,8],
    Test([0,0,1,1,1,2,3,5,9],delta=5,first=True)	== [7,7,7,7,7,7,7,8,8],

    Test([0,0,1,1,1,2,3,5,9],delta=0,first=False)	== [1,1,4,4,4,5,6,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=1,first=False)	== [4,4,5,5,5,6,6,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=2,first=False)	== [5,5,6,6,6,6,7,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=3,first=False)	== [6,6,6,6,6,7,7,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=4,first=False)	== [6,6,7,7,7,7,7,8,8],
    Test([0,0,1,1,1,2,3,5,9],delta=5,first=False)	== [7,7,7,7,7,7,7,8,8],

    Test([0,0,1,1,1,2,3,5,9],delta=-0,first=True)	== [0,0,2,2,2,5,6,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=-1,first=True)	== [0,0,0,0,0,2,5,6,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-2,first=True)	== [0,0,0,0,0,0,2,6,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-3,first=True)	== [0,0,0,0,0,0,0,5,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-4,first=True)	== [0,0,0,0,0,0,0,2,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-5,first=True)	== [0,0,0,0,0,0,0,0,6],

    Test([0,0,1,1,1,2,3,5,9],delta=-0,first=False)	== [1,1,4,4,4,5,6,7,8],
    Test([0,0,1,1,1,2,3,5,9],delta=-1,first=False)	== [1,1,1,1,1,4,5,6,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-2,first=False)	== [1,1,1,1,1,1,4,6,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-3,first=False)	== [1,1,1,1,1,1,1,5,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-4,first=False)	== [1,1,1,1,1,1,1,4,7],
    Test([0,0,1,1,1,2,3,5,9],delta=-5,first=False)	== [1,1,1,1,1,1,1,1,6],

    Test([0],delta=-1)			== [],
    Test([1,5],delta=-3)		== [0,0],
    Test([0])				== [],
    Test([1,5],delta=3)			== [0,1],
  )

  def _prep_testdata(self,*args,**kwargs):
    # benchmark for inputs that are already vectors
    # simplification for tests: dep == indep
    return [as_num_array(sorted(arg)) for arg in args]

  @staticmethod
  def missing1(arg,delta=1,first=False):
    # build answer lookup mapping each arg value to first index
    run_lens = [(k,len(list(g))) for k,g in groupby(arg)]
    keys = as_num_array([k for k,l in run_lens])
    lens = as_num_array([l for k,l in run_lens])
    ends = cumsum(lens)
    starts = ends - lens
    if first:
      answer = dict(izip(keys,starts))
    else:
      answer = dict(izip(keys,ends-1))
    # identify missing keys
    need = keys + delta
    needset = set(need)
    haveset = set(answer)
    fillset = needset.difference(haveset)
    fill = as_num_array(sorted(fillset))
    #
    #print
    #print 'haveset:', haveset
    #print 'need:', need
    #print 'fill:', fill
    #print 'answer1:', answer
    #
    minkey,maxkey = arg[0],arg[-1]
    #
    have_iter = iter(keys[-1::-1])
    fill_iter = iter(fill[-1::-1])
    thiskey = maxkey
    thisval = answer[thiskey]
    for fillkey in fill_iter:
    #  print 'fillkey:', fillkey
      if thiskey >= fillkey:
        try:
          thiskey = dropwhile(lambda x:x>=fillkey,have_iter).next()
        except StopIteration:
          thiskey = minkey
        thisval = answer[thiskey]
      answer[fillkey] = thisval
    #print 'answer2:', answer
    out = [answer[val+delta] for val in arg]
    return out


  @staticmethod
  def naive_loop(arg,delta=1,first=False):
    out = []
    for i,val in enumerate(arg):
      # find answer range
      target = val + delta
      jj = i
      if target > val:
        # look forward
        for j in xrange(i+1,len(arg)):
          if arg[j] > target:
            break
        else:
          j = len(arg)
        jj = j-1
        target = arg[jj]
      elif target < val:
        # look backward
        for j in xrange(i-1,-1,-1):
          if arg[j] <= target:
            break
        else:
          j = 0
        jj = j
        target = arg[jj]

      # find first or last answer within range
      if first:
        kk = 0
        for k in xrange(jj,-1,-1):
          if arg[k] != target:
            kk = k+1
            break
      else:
        kk = len(arg)-1
        for k in xrange(jj,len(arg)):
          if arg[k] != target:
            kk = k-1
            break
      out.append(kk)
    return out