Python dataset Examples, mystic.math.legacydata.dataset Python Examples

Example #1

0

Show file

File: archive.py Project: vishalbelsare/mystic

def get_dist(archive, func, keymap=None):
    """get the graphical distance of func from data in archive

    Args:
        archive (klepto.archive): run archive (output of ``read``)
        func (function): with interface ``y = f(x)``, ``x`` is a list of floats
        keymap (klepto.keymap): keymap used for key encoding

    Returns:
        array of floats, graphical distance from func to each point in archive
    """
    from mystic.math.legacydata import dataset
    data = dataset()
    arxiv = getattr(archive, '__archive__', archive)
    if not len(arxiv):
        return
    #FIXME: need to implement generalized inverse of keymap
    #HACK: only works in certain special cases
    kind = getattr(keymap, '__stub__', '')
    if kind in ('encoding', 'algorithm'):  #FIXME always assumes used repr
        inv = lambda k: eval(k)
    elif kind in ('serializer', ):  #FIXME: ignores all config options
        import importlib
        inv = lambda k: importlib.import_module(keymap.__type__).loads(k)
    else:  #FIXME: give up, ignore keymap
        inv = lambda k: k
    y = ((inv(k), v) for k, v in arxiv.items())
    #HACK end
    import numpy as np
    y = np.array(list(y), dtype=object).T
    data.load(y[0].tolist(), y[1].tolist(), ids=list(range(len(y[0]))))
    from mystic.math.distance import graphical_distance
    return graphical_distance(func, data)

Example #2

0

Show file

File: dataset.py Project: vishalbelsare/mystic

def from_archive(cache, data=None, **kwds):
    '''convert a klepto.archive to a mystic.math.legacydata.dataset

    Inputs:
      cache: a klepto.archive instance
      data: a mystic.math.legacydata.dataset of i points, M inputs, N outputs
      axis: int, the desired index the tuple-valued dataset [0,N]
      ids: a list of ids for the data, or a function ids(x,y) to generate ids
    '''
    axis = kwds.get('axis', None)
    ids = kwds.get('ids', _iargsort)  #XXX: better None?
    import numpy as np  #FIXME: accept lipshitz coeffs as input
    if data is None:
        from mystic.math.legacydata import dataset
        data = dataset()
    # import klepto as kl
    # ca = kl.archives.dir_archive('__objective_5D_cache__', cached=False)
    # k = keymap=kl.keymaps.keymap()
    # c = kl.inf_cache(keymap=k, tol=1, ignore=('**','out'), cache=ca)
    # memo = c(lambda *args, **kwds: kwds['out'])
    # cache = memo.__cache__()
    y = np.array(list(cache.items()),
                 dtype=object).T  #NOTE: cache.items() slow
    if not len(y): return data
    if callable(ids):  #XXX: don't repeat tolist
        ids = ids(y[0].tolist(), y[1].tolist(), axis=axis)
    if axis is None:  #XXX: dataset should be single valued
        return data.load(y[0].tolist(), y[1].tolist(), ids=ids)
    return data.load(y[0].tolist(), [i[axis] for i in y[1]], ids=ids)

Example #3

0

Show file

File: discrete.py Project: agamdua/mystic

  def short_wrt_data(self, data, L=None, blamelist=False, pairs=True, \
                                         all=False, raw=False, **kwds):
    """check for shortness with respect to the given data

Inputs:
    data -- a collection of data points
    L -- the lipschitz constant, if different from that provided with data
    blamelist -- if True, report which points are infeasible
    pairs -- if True, report indicies of infeasible points
    all -- if True, report results for each point (opposed to all points)
    raw -- if True, report numerical results (opposed to boolean results)

Additional Inputs:
    tol -- maximum acceptable deviation from shortness
    cutoff -- zero out distances less than cutoff; typically cutoff = tol or 0.0

Notes:
    Each point x,y can be thought to have an associated double-cone with slope
    equal to the lipschitz constant. Shortness with respect to another point is
    defined by the first point not being inside the cone of the second. We can
    allow for some error in shortness, a short tolerance 'tol', for which the
    point x,y is some acceptable y-distance inside the cone. While very tightly
    related, cutoff and tol play distinct roles; tol is subtracted from
    calculation of the lipschitz_distance, while cutoff zeros out the value
    of any element less than the cutoff.
"""
    from mystic.math.legacydata import dataset 
    _self = dataset() 
    _self.load(self.positions, self.values)
    _self.lipschitz = data.lipschitz
    for i in range(len(_self)):
      _self[i].id = i
    return _self.short(data, L=L, blamelist=blamelist, pairs=pairs, \
                                               all=all, raw=raw, **kwds)

Example #4

0

Show file

  def short_wrt_data(self, data, L=None, blamelist=False, pairs=True, \
                                         all=False, raw=False, **kwds):
    """check for shortness with respect to the given data

Inputs:
    data -- a collection of data points
    L -- the lipschitz constant, if different from that provided with data
    blamelist -- if True, report which points are infeasible
    pairs -- if True, report indices of infeasible points
    all -- if True, report results for each point (opposed to all points)
    raw -- if True, report numerical results (opposed to boolean results)

Additional Inputs:
    tol -- maximum acceptable deviation from shortness
    cutoff -- zero out distances less than cutoff; typically cutoff = tol or 0.0

Notes:
    Each point x,y can be thought to have an associated double-cone with slope
    equal to the lipschitz constant. Shortness with respect to another point is
    defined by the first point not being inside the cone of the second. We can
    allow for some error in shortness, a short tolerance 'tol', for which the
    point x,y is some acceptable y-distance inside the cone. While very tightly
    related, cutoff and tol play distinct roles; tol is subtracted from
    calculation of the lipschitz_distance, while cutoff zeros out the value
    of any element less than the cutoff.
"""
    from mystic.math.legacydata import dataset 
    _self = dataset() 
    _self.load(self.positions, self.values)
    _self.lipschitz = data.lipschitz
    for i in range(len(_self)):
      _self[i].id = i
    return _self.short(data, L=L, blamelist=blamelist, pairs=pairs, \
                                               all=all, raw=raw, **kwds)

Example #5

0

Show file

File: discrete.py Project: agamdua/mystic

 def cost(rv):
   """compute cost from a 1-d array of model parameters,
   where:  cost = | sum(lipschitz_distance) | """
   _data = dataset()
   _pm = scenario()
   _pm.load(rv, pts)      # here rv is param: w,x,y
   if not long_form:
     positions = _pm.select(*range(npts))
   else: positions = _pm.positions
   _data.load( data.coords, data.values )                   # LOAD static
   if _self:
     _data.load( positions, _pm.values )                    # LOAD dynamic
   _data.lipschitz = data.lipschitz                         # LOAD L
   Rv = lipschitz_distance(_data.lipschitz, _pm, _data, tol=cutoff, **kwds)
   v = infeasibility(Rv, cutoff)
   return abs(sum(v))

Example #6

0

Show file

 def cost(rv):
   """compute cost from a 1-d array of model parameters,
   where:  cost = | sum(lipschitz_distance) | """
   _data = dataset()
   _pm = scenario()
   _pm.load(rv, pts)      # here rv is param: w,x,y
   if not long_form:
     positions = _pm.select(*range(npts))
   else: positions = _pm.positions
   _data.load( data.coords, data.values )                   # LOAD static
   if _self:
     _data.load( positions, _pm.values )                    # LOAD dynamic
   _data.lipschitz = data.lipschitz                         # LOAD L
   Rv = lipschitz_distance(_data.lipschitz, _pm, _data, tol=cutoff, **kwds)
   v = infeasibility(Rv, cutoff)
   return abs(sum(v))

Example #7

0

Show file

File: dataset.py Project: nadiiaaii/mystic

def _getitem(data, axis):
    """get the selected axis of the tuple-valued dataset

    Inputs:
      data: a mystic.math.legacydata.dataset of i points, M inputs, N outputs
      axis: int, the desired index the tuple-valued dataset [0,N]
    """
    if len(data.values) and type(data.values[0]) not in (tuple,list):
        msg = "cannot get axis %s for single-valued dataset.values" % axis
        raise ValueError(msg)
    # assumes axis is an int; select values corresponding to axis
    from mystic.math.legacydata import dataset, datapoint as datapt
    ds = dataset()
    for pt in data:
        ds.append(datapt(pt.position, pt.value[axis], pt.id, pt.cone.slopes))
    return ds

Example #8

0

Show file

File: discrete.py Project: agamdua/mystic

  def valid_wrt_model(self, model, blamelist=False, pairs=True, \
                                   all=False, raw=False, **kwds):
    """check for scenario validity with respect to the model

Inputs:
    model -- the model function, y' = F(x')
    blamelist -- if True, report which points are infeasible
    pairs -- if True, report indicies of infeasible points
    all -- if True, report results for each point (opposed to all points)
    raw -- if True, report numerical results (opposed to boolean results)

Additional Inputs:
    ytol -- maximum acceptable difference |y - F(x')|; a single value
    xtol -- maximum acceptable difference |x - x'|; an iterable or single value
    cutoff -- zero out distances less than cutoff; typically: ytol, 0.0, or None
    hausdorff -- norm; where if given, ytol = |y - F(x')| + |x - x'|/norm

Notes:
    xtol defines the n-dimensional base of a pilar of height ytol, centered at
    each point. The region inside the pilar defines the space where a "valid"
    model must intersect. If xtol is not specified, then the base of the pilar
    will be a dirac at x' = x. This function performs an optimization for each
    x to find an appropriate x'. While cutoff and ytol are very tightly related,
    they play a distinct role; ytol is used to set the optimization termination
    for an acceptable |y - F(x')|, while cutoff is applied post-optimization.
    If we are using the hausdorff norm, then ytol will set the optimization
    termination for an acceptable |y - F(x')| + |x - x'|/norm, where the x
    values are normalized by norm = hausdorff.
"""
    from mystic.math.legacydata import dataset 
    data = dataset() 
    data.load(self.positions, self.values)
   #data.lipschitz = L
    for i in range(len(data)):
      data[i].id = i
    return data.valid(model, blamelist=blamelist, pairs=pairs, \
                                       all=all, raw=raw, **kwds)

Example #9

0

Show file

  def valid_wrt_model(self, model, blamelist=False, pairs=True, \
                                   all=False, raw=False, **kwds):
    """check for scenario validity with respect to the model

Inputs:
    model -- the model function, y' = F(x')
    blamelist -- if True, report which points are infeasible
    pairs -- if True, report indices of infeasible points
    all -- if True, report results for each point (opposed to all points)
    raw -- if True, report numerical results (opposed to boolean results)

Additional Inputs:
    ytol -- maximum acceptable difference |y - F(x')|; a single value
    xtol -- maximum acceptable difference |x - x'|; an iterable or single value
    cutoff -- zero out distances less than cutoff; typically: ytol, 0.0, or None
    hausdorff -- norm; where if given, ytol = |y - F(x')| + |x - x'|/norm

Notes:
    xtol defines the n-dimensional base of a pilar of height ytol, centered at
    each point. The region inside the pilar defines the space where a "valid"
    model must intersect. If xtol is not specified, then the base of the pilar
    will be a dirac at x' = x. This function performs an optimization for each
    x to find an appropriate x'. While cutoff and ytol are very tightly related,
    they play a distinct role; ytol is used to set the optimization termination
    for an acceptable |y - F(x')|, while cutoff is applied post-optimization.
    If we are using the hausdorff norm, then ytol will set the optimization
    termination for an acceptable |y - F(x')| + |x - x'|/norm, where the x
    values are normalized by norm = hausdorff.
"""
    from mystic.math.legacydata import dataset 
    data = dataset() 
    data.load(self.positions, self.values)
   #data.lipschitz = L
    for i in range(len(data)):
      data[i].id = i
    return data.valid(model, blamelist=blamelist, pairs=pairs, \
                                       all=all, raw=raw, **kwds)

Example #10

0

Show file

File: dataset.py Project: nadiiaaii/mystic

def from_archive(cache, data=None, axis=None):
    '''convert a klepto.archive to a mystic.math.legacydata.dataset

    Inputs:
      cache: a klepto.archive instance
      data: a mystic.math.legacydata.dataset of i points, M inputs, N outputs
      axis: int, the desired index the tuple-valued dataset [0,N]
    '''
    import numpy as np
    if data is None:
        from mystic.math.legacydata import dataset
        data = dataset()
    # import klepto as kl
    # ca = kl.archives.dir_archive('__objective_5D_cache__', cached=False)
    # k = keymap=kl.keymaps.keymap()
    # c = kl.inf_cache(keymap=k, tol=1, ignore=('**','out'), cache=ca)
    # memo = c(lambda *args, **kwds: kwds['out'])
    # cache = memo.__cache__()
    y = np.array(list(cache.items()), dtype=object).T #NOTE: cache.items() slow
    if not len(y): return data
    ids = list(range(len(y[0])))
    if axis is None: #XXX: dataset should be single valued
        return data.load(y[0].tolist(), y[1].tolist(), ids=ids)
    return data.load(y[0].tolist(), [i[axis] for i in y[1]], ids=ids)

Example #11

0

Show file

File: distance.py Project: Magellen/mystic

def graphical_distance(model, points, **kwds):
  """find the radius(x') that minimizes the graph between reality, y = G(x),
and an approximating function, y' = F(x')

Inputs:
  model = the model function, y' = F(x'), that approximates reality, y = G(x)
  points = object of type 'datapoint' to validate against; defines y = G(x)

Additional Inputs:
  ytol = maximum acceptable difference |y - F(x')|; a single value
  xtol = maximum acceptable difference |x - x'|; an iterable or single value
  cutoff = zero out distances less than cutoff; typically: ytol, 0.0, or None
  hausdorff = norm; where if given, ytol = |y - F(x')| + |x - x'|/norm

Returns:
  radius = minimum distance from x,G(x) to x',F(x') for each x

Notes:
  xtol defines the n-dimensional base of a pilar of height ytol, centered at
  each point. The region inside the pilar defines the space where a "valid"
  model must intersect. If xtol is not specified, then the base of the pilar
  will be a dirac at x' = x. This function performs an optimization for each
  x to find an appropriate x'. While cutoff and ytol are very tightly related,
  they play a distinct role; ytol is used to set the optimization termination
  for an acceptable |y - F(x')|, while cutoff is applied post-optimization.
  If we are using the hausdorff norm, then ytol will set the optimization
  termination for an acceptable |y - F(x')| + |x - x'|/norm, where the x
  values are normalized by norm = hausdorff.
""" #FIXME: update docs to show normalization in y
 #NotImplemented:
 #L = list of lipschitz constants, for use when lipschitz metric is desired
 #constraints = constraints function for finding minimum distance
  from mystic.math.legacydata import dataset
  from numpy import asarray, sum, isfinite, zeros, seterr
  from mystic.solvers import diffev2, fmin_powell
  from mystic.monitors import Monitor, VerboseMonitor

  # ensure target xe and ye is a dataset
  target = dataset()
  target.load(*_get_xy(points))
  nyi = target.npts             # y's are target.values
  nxi = len(target.coords[-1])  # nxi = len(x) / len(y)
  
  # NOTE: the constraints function is a function over a single xe,ye
  #       because each underlying optimization is over a single xe,ye.
  #       thus, we 'pass' on using constraints at this time...
  constraints = None   # default is no constraints
  if 'constraints' in kwds: constraints = kwds.pop('constraints')
  if not constraints:  # if None (default), there are no constraints
    constraints = lambda x: x

  # get tolerance in y and wiggle room in x
  ytol = kwds.pop('ytol', 0.0)
  xtol = kwds.pop('xtol', 0.0) # default is to not allow 'wiggle room' in x 

  cutoff = ytol  # default is to zero out distances less than tolerance
  if 'cutoff' in kwds: cutoff = kwds.pop('cutoff')
  if cutoff is True: cutoff = ytol
  elif cutoff is False: cutoff = None
  ipop = kwds.pop('ipop', min(20, 3*nxi)) #XXX: tune ipop?
  imax = kwds.pop('imax', 1000) #XXX: tune imax?

  # get range for the dataset (normalization for hausdorff distance)
  hausdorff = kwds.pop('hausdorff', False)
  if not hausdorff:  # False, (), None, ...
    ptp = [0.0]*nxi
    yptp = 1.0
  elif hausdorff is True:
    from mystic.math.measures import spread
    ptp = [spread(xi) for xi in zip(*target.coords)]
    yptp = spread(target.values)  #XXX: this can lead to bad bad things...
  else:
    try: #iterables
      if len(hausdorff) < nxi+1:
        hausdorff = list(hausdorff) + [0.0]*(nxi - len(hausdorff)) + [1.0]
      ptp = hausdorff[:-1]  # all the x
      yptp = hausdorff[-1]  # just the y
    except TypeError: #non-iterables
      ptp = [hausdorff]*nxi
      yptp = hausdorff

  #########################################################################
  def radius(model, point, ytol=0.0, xtol=0.0, ipop=None, imax=None):
    """graphical distance between a single point x,y and a model F(x')"""
    # given a single point x,y: find the radius = |y - F(x')| + delta
    # radius is just a minimization over x' of |y - F(x')| + delta
    # where we apply a constraints function (of box constraints) of
    # |x - x'| <= xtol  (for each i in x)
    #
    # if hausdorff = some iterable, delta = |x - x'|/hausdorff
    # if hausdorff = True, delta = |x - x'|/spread(x); using the dataset range
    # if hausdorff = False, delta = 0.0
    #
    # if ipop, then DE else Powell; ytol is used in VTR(ytol)
    # and will terminate when cost <= ytol
    x,y = _get_xy(point)
    y = asarray(y)
    # catch cases where yptp or y will cause issues in normalization
   #if not isfinite(yptp): return 0.0 #FIXME: correct?  shouldn't happen
   #if yptp == 0: from numpy import inf; return inf #FIXME: this is bad

    # build the cost function
    if hausdorff: # distance in all directions
      def cost(rv):
        '''cost = |y - F(x')| + |x - x'| for each x,y (point in dataset)'''
        _y = model(rv)
        if not isfinite(_y): return abs(_y)
        errs = seterr(invalid='ignore', divide='ignore') # turn off warning 
        z = abs((asarray(x) - rv)/ptp)  # normalize by range
        m = abs(y - _y)/yptp            # normalize by range
        seterr(invalid=errs['invalid'], divide=errs['divide']) # turn on warning
        return m + sum(z[isfinite(z)])
    else:  # vertical distance only
      def cost(rv):
        '''cost = |y - F(x')| for each x,y (point in dataset)'''
        return abs(y - model(rv))

    if debug:
      print("rv: %s" % str(x))
      print("cost: %s" % cost(x))

    # if xtol=0, radius is difference in x,y and x,F(x); skip the optimization
    try:
      if not imax or not max(xtol): #iterables
        return cost(x)
    except TypeError:
      if not xtol: #non-iterables
        return cost(x)

    # set the range constraints
    xtol = asarray(xtol)
    bounds = list(zip( x - xtol, x + xtol ))

    if debug:
      print("lower: %s" % str(zip(*bounds)[0]))
      print("upper: %s" % str(zip(*bounds)[1]))

    # optimize where initially x' = x
    stepmon = Monitor()
    if debug: stepmon = VerboseMonitor(1)
    #XXX: edit settings?
    MINMAX = 1 #XXX: confirm MINMAX=1 is minimization
    ftol = ytol
    gtol = None  # use VTRCOG
    if ipop:
      results = diffev2(cost, bounds, ipop, ftol=ftol, gtol=gtol, \
                        itermon = stepmon, maxiter=imax, bounds=bounds, \
                        full_output=1, disp=0, handler=False)
    else:
      results = fmin_powell(cost, x, ftol=ftol, gtol=gtol, \
                            itermon = stepmon, maxiter=imax, bounds=bounds, \
                            full_output=1, disp=0, handler=False)
   #solved = results[0]            # x'
    func_opt = MINMAX * results[1] # cost(x')
    if debug:
      print("solved: %s" % results[0])
      print("cost: %s" % func_opt)

    # get the minimum distance |y - F(x')|
    return func_opt
   #return results[0], func_opt
  #########################################################################

  #XXX: better to do a single optimization rather than for each point ???
  d = [radius(model, point, ytol, xtol, ipop, imax) for point in target]
  return infeasibility(d, cutoff)

Example #12

0

Show file

File: distance.py Project: Magellen/mystic

  pm = scenario()
  pm.load(param1, pts)
  print("pm.wts: %s" % str(pm.wts))
  print("pm.pos: %s" % str(pm.pos))
  W = pm.weights
  X = pm.coords
  Y = pm.values
  print("pm.weights: %s" % str(W))
  print("pm.coords: %s" % str(X))
  print("pm.values: %s" % str(Y))

  print("\nbuilding a dataset from the scenario...")
  # build a dataset (using X,Y)
  # [store W as 'weights' ?]
  from mystic.math.legacydata import dataset
  d = dataset()
  d.load(X, Y) 
  print("d.coords: %s" % str(d.coords))
  print("d.values: %s" % str(d.values))

  print("\nedit the dataset...")
  d[0].value = 0
  print("d.values: %s" % str(d.values))
  # DON'T EDIT d[0].position... IT BREAKS PRODUCT MEASURE!!!

  print("\nupdate the scenario from the dataset...")
  # update pm(w,x,Y | W,X) from dataset(X,Y)
  pm.coords, pm.values = d.fetch()
  print("then, build a new list of params from the scenario...")
  # convert pm(w,x,Y | W,X) to params(w,x,Y)
  param1 = pm.flatten(all=True)

Example #13

0

Show file

File: support_hypercube_scenario.py Project: agamdua/mystic

    filter = None

  try: # select the scenario dimensions
    npts = eval(parsed_opts.dim)  # format is "(1,1,1)"
    if npts is None: # npts may have been logged
      exec "from %s import npts" % file
  except:
    npts = (1,1,1) #XXX: better in parsed_args ?

  try: # get the name of the dataset file
    file = parsed_args[1]
    from mystic.math.legacydata import load_dataset
    data = load_dataset(file, filter)
  except:
#   raise IOError, "please provide dataset file name"
    data = dataset()
    cones = False
    legacy = False

  try: # select the bounds
    _bounds = parsed_opts.bounds
    if _bounds[-1] == "*": #XXX: then bounds are NOT strictly enforced
      _bounds = _bounds[:-1]
      strict = False
    else:
      strict = True
    bounds = eval(_bounds)  # format is "[(60,105),(0,30),(2.1,2.8)]"
  except:
    strict = True
    bounds = [(0,1),(0,1),(0,1)]

Example #14

0

Show file

        filter = None

    try:  # select the scenario dimensions
        npts = eval(parsed_opts.dim)  # format is "(1,1,1)"
        if npts is None:  # npts may have been logged
            exec "from %s import npts" % file
    except:
        npts = (1, 1, 1)  #XXX: better in parsed_args ?

    try:  # get the name of the dataset file
        file = parsed_args[1]
        from mystic.math.legacydata import load_dataset
        data = load_dataset(file, filter)
    except:
        #   raise IOError, "please provide dataset file name"
        data = dataset()
        cones = False
        legacy = False

    try:  # select the bounds
        _bounds = parsed_opts.bounds
        if _bounds[-1] == "*":  #XXX: then bounds are NOT strictly enforced
            _bounds = _bounds[:-1]
            strict = False
        else:
            strict = True
        bounds = eval(_bounds)  # format is "[(60,105),(0,30),(2.1,2.8)]"
    except:
        strict = True
        bounds = [(0, 1), (0, 1), (0, 1)]

Example #15

0

Show file

File: distance.py Project: kashenfelter/mystic

  pm = scenario()
  pm.load(param1, pts)
  print("pm.wts: %s" % str(pm.wts))
  print("pm.pos: %s" % str(pm.pos))
  W = pm.weights
  X = pm.coords
  Y = pm.values
  print("pm.weights: %s" % str(W))
  print("pm.coords: %s" % str(X))
  print("pm.values: %s" % str(Y))

  print("\nbuilding a dataset from the scenario...")
  # build a dataset (using X,Y)
  # [store W as 'weights' ?]
  from mystic.math.legacydata import dataset
  d = dataset()
  d.load(X, Y) 
  print("d.coords: %s" % str(d.coords))
  print("d.values: %s" % str(d.values))

  print("\nedit the dataset...")
  d[0].value = 0
  print("d.values: %s" % str(d.values))
  # DON'T EDIT d[0].position... IT BREAKS PRODUCT MEASURE!!!

  print("\nupdate the scenario from the dataset...")
  # update pm(w,x,Y | W,X) from dataset(X,Y)
  pm.coords, pm.values = d.fetch()
  print("then, build a new list of params from the scenario...")
  # convert pm(w,x,Y | W,X) to params(w,x,Y)
  param1 = pm.flatten(all=True)

Example #16

0

Show file

File: discrete.py Project: agamdua/mystic

dirac_measure = measure


if __name__ == '__main__':
  from mystic.math.distance import *
  model = lambda x:sum(x)
  a = [0,1,9,8, 1,0,4,6, 1,0,1,2, 0,1,2,3,4,5,6,7]
  feasability = 0.0; deviation = 0.01
  validity = 5.0; wiggle = 1.0
  y_mean = 5.0; y_buffer = 0.0
  L = [.75,.5,.25]
  bc = [(0,7,2),(3,0,2),(2,0,3),(1,0,3),(2,4,2)]
  bv = [5,3,1,4,8]
  pts = (2,2,2)
  from mystic.math.legacydata import dataset
  data = dataset()
  data.load(bc, bv)
  data.lipschitz = L
  pm = scenario()
  pm.load(a, pts)
  pc = pm.positions
  pv = pm.values
  #---
  _data = dataset()
  _data.load(bc, bv)
  _data.load(pc, pv)
  _data.lipschitz = data.lipschitz
  from numpy import sum
  ans = sum(lipschitz_distance(L, pm, _data))
  print "original: %s @ %s\n" % (ans, a)
 #print "pm: %s" % pm

Example #17

0

Show file

File: distance.py Project: kashenfelter/mystic

def graphical_distance(model, points, **kwds):
  """find the ``radius(x')`` that minimizes the graph between reality (data),
``y = G(x)``, and an approximating function, ``y' = F(x')``.

Args:
    model (func): a model ``y' = F(x')`` that approximates reality ``y = G(x)``
    points (mystic.math.legacydata.dataset): a dataset, defines ``y = G(x)``
    ytol (float, default=0.0): maximum acceptable difference ``|y - F(x')|``.
    xtol (float, default=0.0): maximum acceptable difference ``|x - x'|``.
    cutoff (float, default=ytol): zero out distances less than cutoff.
    hausdorff (bool, default=False): hausdorff ``norm``, where if given,
        then ``ytol = |y - F(x')| + |x - x'|/norm``.

Returns:
    the radius (the minimum distance ``x,G(x)`` to ``x',F(x')`` for each ``x``)

Notes:
    *points* can be a ``mystic.math.legacydata.dataset`` or a list of
    ``mystic.math.legacydata.datapoint`` objects.

    *xtol* defines the n-dimensional base of a pilar of height *ytol*,
    centered at each point. The region inside the pilar defines the space
    where a "valid" model must intersect. If *xtol* is not specified, then
    the base of the pilar will be a dirac at ``x' = x``. This function
    performs an optimization for each ``x`` to find an appropriate ``x'``.

    *ytol* is a single value, while *xtol* is a single value or an iterable.
    *cutoff* takes a float or a boolean, where ``cutoff=True`` will set the
    value of *cutoff* to the default. Typically, the value of *cutoff* is
    *ytol*, 0.0, or None. *hausdorff* can be False (e.g. ``norm = 1.0``),
    True (e.g. ``norm = spread(x)``), or a list of points of ``len(x)``.

    While *cutoff* and *ytol* are very tightly related, they play a distinct
    role; *ytol* is used to set the optimization termination for an acceptable
    ``|y - F(x')|``, while *cutoff* is applied post-optimization.

    If we are using the hausdorff norm, then *ytol* will set the optimization
    termination for an acceptable ``|y - F(x')| + |x - x'|/norm``, where the
    ``x`` values are normalized by ``norm = hausdorff``.
""" #FIXME: update docs to show normalization in y
 #NotImplemented:
 #L = list of lipschitz constants, for use when lipschitz metric is desired
 #constraints = constraints function for finding minimum distance
  from mystic.math.legacydata import dataset
  from numpy import asarray, sum, isfinite, zeros, seterr
  from mystic.solvers import diffev2, fmin_powell
  from mystic.monitors import Monitor, VerboseMonitor

  # ensure target xe and ye is a dataset
  target = dataset()
  target.load(*_get_xy(points))
  nyi = target.npts             # y's are target.values
  nxi = len(target.coords[-1])  # nxi = len(x) / len(y)
  
  # NOTE: the constraints function is a function over a single xe,ye
  #       because each underlying optimization is over a single xe,ye.
  #       thus, we 'pass' on using constraints at this time...
  constraints = None   # default is no constraints
  if 'constraints' in kwds: constraints = kwds.pop('constraints')
  if not constraints:  # if None (default), there are no constraints
    constraints = lambda x: x

  # get tolerance in y and wiggle room in x
  ytol = kwds.pop('ytol', 0.0)
  xtol = kwds.pop('xtol', 0.0) # default is to not allow 'wiggle room' in x 

  cutoff = ytol  # default is to zero out distances less than tolerance
  if 'cutoff' in kwds: cutoff = kwds.pop('cutoff')
  if cutoff is True: cutoff = ytol
  elif cutoff is False: cutoff = None
  ipop = kwds.pop('ipop', min(20, 3*nxi)) #XXX: tune ipop?
  imax = kwds.pop('imax', 1000) #XXX: tune imax?

  # get range for the dataset (normalization for hausdorff distance)
  hausdorff = kwds.pop('hausdorff', False)
  if not hausdorff:  # False, (), None, ...
    ptp = [0.0]*nxi
    yptp = 1.0
  elif hausdorff is True:
    from mystic.math.measures import spread
    ptp = [spread(xi) for xi in zip(*target.coords)]
    yptp = spread(target.values)  #XXX: this can lead to bad bad things...
  else:
    try: #iterables
      if len(hausdorff) < nxi+1:
        hausdorff = list(hausdorff) + [0.0]*(nxi - len(hausdorff)) + [1.0]
      ptp = hausdorff[:-1]  # all the x
      yptp = hausdorff[-1]  # just the y
    except TypeError: #non-iterables
      ptp = [hausdorff]*nxi
      yptp = hausdorff

  #########################################################################
  def radius(model, point, ytol=0.0, xtol=0.0, ipop=None, imax=None):
    """graphical distance between a single point x,y and a model F(x')"""
    # given a single point x,y: find the radius = |y - F(x')| + delta
    # radius is just a minimization over x' of |y - F(x')| + delta
    # where we apply a constraints function (of box constraints) of
    # |x - x'| <= xtol  (for each i in x)
    #
    # if hausdorff = some iterable, delta = |x - x'|/hausdorff
    # if hausdorff = True, delta = |x - x'|/spread(x); using the dataset range
    # if hausdorff = False, delta = 0.0
    #
    # if ipop, then DE else Powell; ytol is used in VTR(ytol)
    # and will terminate when cost <= ytol
    x,y = _get_xy(point)
    y = asarray(y)
    # catch cases where yptp or y will cause issues in normalization
   #if not isfinite(yptp): return 0.0 #FIXME: correct?  shouldn't happen
   #if yptp == 0: from numpy import inf; return inf #FIXME: this is bad

    # build the cost function
    if hausdorff: # distance in all directions
      def cost(rv):
        '''cost = |y - F(x')| + |x - x'| for each x,y (point in dataset)'''
        _y = model(rv)
        if not isfinite(_y): return abs(_y)
        errs = seterr(invalid='ignore', divide='ignore') # turn off warning 
        z = abs((asarray(x) - rv)/ptp)  # normalize by range
        m = abs(y - _y)/yptp            # normalize by range
        seterr(invalid=errs['invalid'], divide=errs['divide']) # turn on warning
        return m + sum(z[isfinite(z)])
    else:  # vertical distance only
      def cost(rv):
        '''cost = |y - F(x')| for each x,y (point in dataset)'''
        return abs(y - model(rv))

    if debug:
      print("rv: %s" % str(x))
      print("cost: %s" % cost(x))

    # if xtol=0, radius is difference in x,y and x,F(x); skip the optimization
    try:
      if not imax or not max(xtol): #iterables
        return cost(x)
    except TypeError:
      if not xtol: #non-iterables
        return cost(x)

    # set the range constraints
    xtol = asarray(xtol)
    bounds = list(zip( x - xtol, x + xtol ))

    if debug:
      print("lower: %s" % str(zip(*bounds)[0]))
      print("upper: %s" % str(zip(*bounds)[1]))

    # optimize where initially x' = x
    stepmon = Monitor()
    if debug: stepmon = VerboseMonitor(1)
    #XXX: edit settings?
    MINMAX = 1 #XXX: confirm MINMAX=1 is minimization
    ftol = ytol
    gtol = None  # use VTRCOG
    if ipop:
      results = diffev2(cost, bounds, ipop, ftol=ftol, gtol=gtol, \
                        itermon = stepmon, maxiter=imax, bounds=bounds, \
                        full_output=1, disp=0, handler=False)
    else:
      results = fmin_powell(cost, x, ftol=ftol, gtol=gtol, \
                            itermon = stepmon, maxiter=imax, bounds=bounds, \
                            full_output=1, disp=0, handler=False)
   #solved = results[0]            # x'
    func_opt = MINMAX * results[1] # cost(x')
    if debug:
      print("solved: %s" % results[0])
      print("cost: %s" % func_opt)

    # get the minimum distance |y - F(x')|
    return func_opt
   #return results[0], func_opt
  #########################################################################

  #XXX: better to do a single optimization rather than for each point ???
  d = [radius(model, point, ytol, xtol, ipop, imax) for point in target]
  return infeasibility(d, cutoff)

Example #18

0

Show file

dirac_measure = measure


if __name__ == '__main__':
  from mystic.math.distance import *
  model = lambda x:sum(x)
  a = [0,1,9,8, 1,0,4,6, 1,0,1,2, 0,1,2,3,4,5,6,7]
  feasability = 0.0; deviation = 0.01
  validity = 5.0; wiggle = 1.0
  y_mean = 5.0; y_buffer = 0.0
  L = [.75,.5,.25]
  bc = [(0,7,2),(3,0,2),(2,0,3),(1,0,3),(2,4,2)]
  bv = [5,3,1,4,8]
  pts = (2,2,2)
  from mystic.math.legacydata import dataset
  data = dataset()
  data.load(bc, bv)
  data.lipschitz = L
  pm = scenario()
  pm.load(a, pts)
  pc = pm.positions
  pv = pm.values
  #---
  _data = dataset()
  _data.load(bc, bv)
  _data.load(pc, pv)
  _data.lipschitz = data.lipschitz
  from numpy import sum
  ans = sum(lipschitz_distance(L, pm, _data))
  print "original: %s @ %s\n" % (ans, a)
 #print "pm: %s" % pm