예제 #1
0
파일: rv.py 프로젝트: Bhumbra/probayes
  def evaluate(self, values, use_pfun=True):
    """ Evaluates value(s) belonging to the domain of the variable.

    :param values: None, set of a single integer, array, or scalar.
    :param use_pfun: boolean flag to make use of pfun if previously set.

    :return: a NumPy array of the values (see Variable.evaluate()):
    """
    use_pfun = use_pfun and self._pfun is not None and isunitsetint(values)

    # If not using pfun while random sampling, we use sfun for booleans
    if not use_pfun:
      if self._vtype in VTYPES[bool] and hasattr(self._sfun, 'expr'):
        if isunitsetint(values) and self._sfun.expr == bernoulli_sfun:
          number = list(values)[0]
          if not number or number < 0:
            number = number if not number else -number
            return Distribution(self._name, 
                                {self.name: self._sfun[None](number)})
      return super().evaluate(values)

    # Evaluate values from inverse cdf bounded within cdf limits
    number = list(values)[0]
    assert self.isfinite, \
        "Cannot evaluate {} values for bounds: {}".format(values, self._ulims)
    lims = self.pfun[0](self._ulims)
    values = uniform(
                     lims[0], lims[1], number, 
                     isinstance(self._vset[0], tuple),
                     isinstance(self._vset[1], tuple)
                    )
    return Distribution(self._name, {self.name: self.pfun[1](values)})
예제 #2
0
 def interp(self, *args, cond_pdf=False):
     # args in order of mean - one must be a unitsetint
     idx = None
     dmu = np.empty((self._n, 1), dtype=float)
     vals = [arg for arg in args]
     for i, arg in enumerate(args):
         if isunitsetint(arg):
             if idx is None:
                 idx = i
             else:
                 raise ValueError(
                     "Only one argument can be interpolated at a time")
         else:
             dmu[i] = arg - self._mean[i]
     assert idx is not None, "No variable specified for interpolation"
     dmu = np.delete(dmu, (idx), axis=0)
     lims = self._cdfs[idx]
     number = list(args[idx])[0]
     cdf = uniform(lims[0], lims[1], number)
     mean = self._mean[idx] + float(self._coef[idx].dot(dmu))
     stdv = self._stdv[idx]
     vals = scipy.stats.norm.ppf(cdf, loc=mean, scale=stdv)
     if not cond_pdf:
         return vals
     return vals, scipy.stats.norm.pdf(vals, loc=mean, scale=stdv)
예제 #3
0
파일: field.py 프로젝트: Bhumbra/probayes
    def eval_step(self, pred_vals, succ_vals, reverse=False):
        """ Returns adjusted succ_vals """

        # Evaluate deltas if required
        if succ_vals is None:
            if self._delta is None:
                pred_values = list(pred_vals.values())
                if all([isscalar(pred_value) for pred_value in pred_values]):
                    raise ValueError(
                        "Stochastic step sampling not supported for Field; use RF"
                    )
                else:
                    succ_vals = pred_vals
            else:
                succ_vals = self.eval_delta()
        elif isinstance(succ_vals, Expression) or \
            isinstance(succ_vals, (tuple, self._delta_type)):
            succ_vals = self.eval_delta(succ_vals)

        # Apply deltas
        cond = None
        if isinstance(succ_vals, self._delta_type):
            succ_vals = self.apply_delta(pred_vals, succ_vals)
        elif isunitsetint(succ_vals):
            raise ValueError(
                "Stochastic step sampling not supported for Field; use RF")

        # Initialise outputs with predecessor values
        dims = {}
        kwargs = {'reverse': reverse}
        if cond is not None:
            kwargs = {'cond': cond}
        vals = collections.OrderedDict()
        for key in self._keylist:
            vals.update({key: pred_vals[key]})
        if succ_vals is None:
            return vals, dims, kwargs

        # If stepping, add successor values
        for key in self._keylist:
            mod_key = key + "'"
            succ_key = key if mod_key not in succ_vals else mod_key
            vals.update({key + "'": succ_vals[succ_key]})

        return vals, dims, kwargs
예제 #4
0
파일: field.py 프로젝트: Bhumbra/probayes
    def parse_args(self, *args, **kwds):
        """ Returns (values, iid) from *args and **kwds """
        pass_all = False if 'pass_all' not in kwds else kwds.pop('pass_all')
        values = parse_as_str_dict(*args, **kwds)
        seen_keys = []
        for key, val in values.items():
            count_comma = key.count(',')
            if count_comma:
                seen_keys.extend(key.split(','))
                if isinstance(val, (tuple, list)):
                    assert len(val) == count_comma+1, \
                        "Mismatch in key specification {} and number of values {}".\
                        format(key, len(val))
                else:
                    values.update({key: [val] * (count_comma + 1)})
            else:
                seen_keys.append(key)
            if not pass_all:
                assert seen_keys[-1] in self._keyset, \
                    "Unrecognised key {} among available Variables {}".format(
                        seen_keys[-1], self._keyset)

        # Default values
        def_val = None
        if self._anyfloat:
            if not len(values):
                def_val = {0}
            else:
                def_val = {0}
                for val in values.values():
                    if isunitsetint(val):
                        if val == {0}:
                            def_val = None
        for key in self._keylist:
            if key not in seen_keys:
                values.update({key: def_val})
        if pass_all:
            list_keys = list(values.keys())
            for key in list_keys:
                if key not in self._keylist:
                    values.pop(key)

        return values
예제 #5
0
    def eval_step(self, pred_vals, succ_vals, reverse=False):
        """ Returns adjusted succ_vals """
        if succ_vals is None:
            if self._delta is None:
                if all([isscalar(pred_value) for pred_value in pred_vals]):
                    succ_vals = {0}

        # If not sampling succeeding values, use deterministic call
        if not isunitsetint(succ_vals):
            return super().eval_step(pred_vals, succ_vals, reverse=reverse)

        if self._tfun is not None and self._tfun.callable:
            succ_vals = self.eval_tfun(pred_vals)
        elif self._nvars == 1:
            var = self._varlist[0]
            tran = var.tran
            tfun = var.tfun
            if (tran is not None and not tran.callable) or \
                (tfun is not None and tfun.callable):
                vals, dims, kwargs = var.eval_step(pred_vals[var.name],
                                                   succ_vals,
                                                   reverse=reverse)
                return vals, dims, kwargs
            raise ValueError("Transitional CDF calling requires callable tfun")
        else:
            raise ValueError("Transitional CDF calling requires callable tfun")

        # Initialise outputs with predecessor values
        dims = {}
        kwargs = {'reverse': reverse}
        vals = collections.OrderedDict()
        for key in self._keylist:
            vals.update({key: pred_vals[key]})
        if succ_vals is None and self._tran is None:
            return vals, dims, kwargs

        # If stepping or have a transition function, add successor values
        for key in self._keylist:
            mod_key = key + "'"
            succ_key = key if mod_key not in succ_vals else mod_key
            vals.update({key + "'": succ_vals[succ_key]})

        return vals, dims, kwargs
예제 #6
0
def product(*args, **kwds):
  """ Multiplies two or more PDs subject to the following:
  1. They must not share the same marginal variables. 
  2. Conditional variables must be identical unless contained as marginal from
     another distribution.
  """
  from probayes.pd import PD

  # Check pscales, scalars, possible fasttrack
  if not len(args):
    return None
  kwds = dict(kwds)
  pscales = [arg.pscale for arg in args]
  pscale = kwds.get('pscale', None) or prod_pscale(pscales)
  aresingleton = [arg.issingleton for arg in args]
  maybe_fasttrack = all(aresingleton) and \
                    np.all(pscale == np.array(pscales)) and \
                    pscale in [0, 1.]


  # Collate vals, probs, marg_names, and cond_names as lists
  vals = [collections.OrderedDict(arg) for arg in args]
  probs = [arg.prob for arg in args]
  marg_names = [list(arg.marg.values()) for arg in args]
  cond_names = [list(arg.cond.values()) for arg in args]

  # Detect uniqueness in marginal keys and identical conditionals
  all_marg_keys = []
  for arg in args:
    all_marg_keys.extend(list(arg.marg.keys()))
  marg_sets = None
  if len(all_marg_keys) != len(set(all_marg_keys)):
    marg_keys, cond_keys, marg_sets, = None, None, None
    for arg in args:
      if marg_keys is None:
        marg_keys = list(arg.marg.keys())
      elif marg_keys != list(arg.marg.keys()):
        marg_keys = None
        break
      if cond_keys is None:
        cond_keys = list(arg.cond.keys())
      elif cond_keys != list(arg.cond.keys()):
        marg_keys = None
        break
      if marg_keys:  
        are_marg_sets = np.array([isunitsetint(arg[marg_key]) for
                                  marg_key in marg_keys])
        if marg_sets is None:
          if np.any(are_marg_sets):
            marg_sets = are_marg_sets
          else:
            marg_keys = None
            break
        elif not np.all(marg_sets == are_marg_sets):
          marg_keys = None
          break
    assert marg_keys is not None and marg_sets is not None, \
      "Non-unique marginal variables for currently not supported: {}".\
      format(all_marg_keys)
    maybe_fasttrack = True

  # Maybe fast-track identical conditionals
  if maybe_fasttrack:
    marg_same = True
    cond_same = True
    if marg_sets is None: # no need to recheck if not None (I think)
      marg_same = True
      for name in marg_names[1:]:
        if marg_names[0] != name:
          marg_same = False
          break
      cond_same = not any(cond_names)
      if not cond_same:
        cond_same = True
        for name in cond_names[1:]:
          if cond_names[0] != name:
            cond_same = False
            break
    if marg_same and cond_same:
      marg_names = marg_names[0]
      cond_names = cond_names[0]
      prod_marg_name = ','.join(marg_names)
      prod_cond_name = ','.join(cond_names)
      prod_name = '|'.join([prod_marg_name, prod_cond_name])
      prod_vals = collections.OrderedDict()
      for i, val in enumerate(vals):
        areunitsetints = np.array([isunitsetint(_val) 
                                   for _val in val.values()])
        if not np.any(areunitsetints):
          prod_vals.update(val)
        else:
          assert marg_sets is not None, "Variable mismatch"
          assert np.all(marg_sets == areunitsetints[:len(marg_sets)]), \
              "Variable mismatch"
          if not len(prod_vals):
            prod_vals.update(collections.OrderedDict(val))
          else:
            for j, key in enumerate(prod_vals.keys()):
              if areunitsetints[j]:
                prod_vals.update({key: {list(prod_vals[key])[0] + \
                                        list(val[key])[0]}})
      if marg_sets is not None:
        prob, pscale = prod_rule(*tuple(probs), pscales=pscales, pscale=pscale)
        return PD(prod_name, prod_vals, dims=args[0].dims, prob=prob, pscale=pscale)
      else:
        prod_prob = float(sum(probs)) if iscomplex(pscale) else float(np.prod(probs))
        return PD(prod_name, prod_vals, prob=prod_prob, pscale=pscale)

  # Check cond->marg accounts for all differences between conditionals
  prod_marg = [name for dist_marg_names in marg_names \
                          for name in dist_marg_names]
  prod_marg_name = ','.join(prod_marg)
  flat_cond_names = [name for dist_cond_names in cond_names \
                          for name in dist_cond_names]
  cond2marg = [cond_name for cond_name in flat_cond_names \
                         if cond_name in prod_marg]
  prod_cond = [cond_name for cond_name in flat_cond_names \
                         if cond_name not in cond2marg]
  cond2marg_set = set(cond2marg)

  # Check conditionals compatible
  prod_cond_set = set(prod_cond)
  cond2marg_dict = {name: None for name in cond2marg}
  for i, arg in enumerate(args):
    cond_set = set(cond_names[i]) - cond2marg_set
    if cond_set:
      assert prod_cond_set == cond_set, \
          "Incompatible product conditional {} for conditional set {}: ".format(
              prod_cond_set, cond_set)
    for name in cond2marg:
      if name in arg.keys():
        values = arg[name]
        if not isscalar(values):
          values = np.ravel(values)
        if cond2marg_dict[name] is None:
          cond2marg_dict[name] = values
        elif not np.allclose(cond2marg_dict[name], values):
          raise ValueError("Mismatch in values for condition {}".format(name))

  # Establish product name, values, and dimensions
  prod_keys = str2key(prod_marg + prod_cond)
  prod_nkeys = len(prod_keys)
  prod_aresingleton = np.zeros(prod_nkeys, dtype=bool)
  prod_areunitsetints = np.zeros(prod_nkeys, dtype=bool)
  prod_cond_name = ','.join(prod_cond)
  prod_name = prod_marg_name if not len(prod_cond_name) \
              else '|'.join([prod_marg_name, prod_cond_name])
  prod_vals = collections.OrderedDict()
  for i, key in enumerate(prod_keys):
    values = None
    for val in vals:
      if key in val.keys():
        values = val[key]
        prod_areunitsetints[i] = isunitsetint(val[key])
        if prod_areunitsetints[i]:
          values = {0}
        break
    assert values is not None, "Values for key {} not found".format(key)
    prod_aresingleton[i] = issingleton(values)
    prod_vals.update({key: values})
  if np.any(prod_areunitsetints):
    for i, key in enumerate(prod_keys):
      if prod_areunitsetints[i]:
        for val in vals:
          if key in val:
            assert isunitsetint(val[key]), "Mismatch in variables {} vs {}".\
                format(prod_vals, val)
            prod_vals.update({key: {list(prod_vals[key])[0] + list(val[key])[0]}})
  prod_newdims = np.array(np.logical_not(prod_aresingleton))
  dims_shared = False
  for arg in args:
    argdims = [dim for dim in arg.dims.values() if dim is not None]
    if len(argdims) != len(set(argdims)):
      dims_shared = True

  # Shared dimensions limit product dimensionality
  if dims_shared:
    seen_keys = set()
    for i, key in enumerate(prod_keys):
      if prod_newdims[i] and key not in seen_keys:
        for arg in args:
          if key in arg.dims:
            dim = arg.dims[key]
            seen_keys.add(key)
            for argkey, argdim in arg.dims.items():
              seen_keys.add(argkey)
              if argkey != key and argdim is not None:
                if dim == argdim:
                  index = prod_keys.index(argkey)
                  prod_newdims[index] = False

  prod_cdims = np.cumsum(prod_newdims)
  prod_ndims = prod_cdims[-1]

  # Fast-track scalar products
  if maybe_fasttrack and prod_ndims == 0:
     prob = float(sum(probs)) if iscomplex(pscale) else float(np.prod(probs))
     return PD(prod_name, prod_vals, prob=prob, pscale=pscale)

  # Reshape values - they require no axes swapping
  ones_ndims = np.ones(prod_ndims, dtype=int)
  prod_shape = np.ones(prod_ndims, dtype=int)
  scalarset = set()
  prod_dims = collections.OrderedDict()
  for i, key in enumerate(prod_keys):
    if prod_aresingleton[i]:
      scalarset.add(key)
    else:
      values = prod_vals[key]
      re_shape = np.copy(ones_ndims)
      dim = prod_cdims[i]-1
      prod_dims.update({key: dim})
      re_shape[dim] = values.size
      prod_shape[dim] = values.size
      prod_vals.update({key: values.reshape(re_shape)})
  
  # Match probability axes and shapes with axes swapping then reshaping
  for i in range(len(args)):
    prob = probs[i]
    if not isscalar(prob):
      dims = collections.OrderedDict()
      for key, val in args[i].dims.items():
        if val is not None:
          dims.update({val: prod_dims[key]})
      old_dims = []
      new_dims = []
      for key, val in dims.items():
        if key not in old_dims:
          old_dims.append(key)
          new_dims.append(val)
      if len(old_dims) > 1 and not old_dims == new_dims:
        max_dims_inc = max(new_dims) + 1
        while prob.ndim < max_dims_inc:
          prob = np.expand_dims(prob, -1)
        prob = np.moveaxis(prob, old_dims, new_dims)
      re_shape = np.copy(ones_ndims)
      for dim in new_dims:
        re_shape[dim] = prod_shape[dim]
      probs[i] = prob.reshape(re_shape)

  # Multiply the probabilities and output the result as a distribution instance
  prob, pscale = prod_rule(*tuple(probs), pscales=pscales, pscale=pscale)

  return PD(prod_name, prod_vals, dims=prod_dims, prob=prob, pscale=pscale)
예제 #7
0
def summate(*args):
  """ Quick and dirty concatenation """
  from probayes.pd import PD
  if not len(args):
    return None
  pscales = [arg.pscale for arg in args]
  vals = [dict(arg) for arg in args]
  probs = [arg.prob for arg in args]

  # Check pscales are the same
  pscale = pscales[0]
  for _pscale in pscales[1:]:
    assert pscale == _pscale, \
        "Cannot summate distributions with different pscales"

  # Check marginal and conditional keys
  marg_keys = list(args[0].marg.keys())
  cond_keys = list(args[0].cond.keys())
  for arg in args[1:]:
    assert marg_keys == list(arg.marg.keys()), \
      "Marginal variable names not identical across distributions: {}"
    assert cond_keys == list(arg.cond.keys()), \
      "Conditional variable names not identical across distributions: {}"
  sum_keys = marg_keys + cond_keys
  sum_name = ','.join(marg_keys)
  if cond_keys:
    sum_name += '|' + ','.join(cond_keys)

  # If all singleton, concatenate in dimension 0
  if all([arg.issingleton for arg in args]):
    unitsets = {key: isunitsetint(args[0][key]) for key in sum_keys}
    sum_dims = {key: None if unitsets[key] else 0 for key in sum_keys}
    sum_vals = {key: 0 if unitsets[key] else [] for key in sum_keys}
    sum_prob = []
    for arg in args:
      for key, val in arg.items():
        if unitsets[key]:
          assert isunitsetint(val), \
              "Cannot mix unspecified set and specified values"
          sum_vals[key] += list(val)[0]
        else:
          assert not isunitsetint(val), \
              "Cannot mix unspecified set and specified values"
          sum_vals[key].append(val)
      sum_prob.append(arg.prob)
    for key in sum_keys:
      if unitsets[key]:
        sum_vals[key] = {sum_vals[key]}
      else:
        sum_vals[key] = np.ravel(sum_vals[key])
    sum_prob = np.ravel(sum_prob)
    return PD(sum_name, sum_vals, dims=sum_dims, prob=sum_prob, pscale=pscale)

  # 2. all identical but in one dimension: concatenate in that dimension
  # TODO: fix the remaining code of this function below
  sum_vals = collections.OrderedDict(args[0])
  sum_dims = [None] * (len(args) - 1)
  for i, arg in enumerate(args):
    if i == 0:
      continue
    for key in marg_keys:
      if sum_dims[i-1] is not None:
        continue
      elif not arg.singleton(key):
        key_vals = arg[key]
        if key_vals.size == sum_vals[key].size:
          if np.allclose(key_vals, sum_vals[key]):
            continue
        sum_dims[i-1] = arg.dims[key]
  assert len(set(sum_dims)) > 1, "Cannot find unique concatenation axis"
  sum_dim = sum_dims[0]
  sum_dims = args[0].dims
  key = marg_keys[sum_dim]
  sum_prob = np.copy(probs[0])
  for i, val in enumerate(vals):
    if i == 0:
      continue
    sum_vals[key] = np.concatenate([sum_vals[key], val[key]], axis=sum_dim)
    sum_prob = np.concatenate([sum_prob, probs[i]], axis=sum_dim)
  return PD(sum_name, sum_vals, dims=sum_dims, prob=sum_prob, pscale=pscale)
예제 #8
0
파일: rv.py 프로젝트: Bhumbra/probayes
  def eval_step(self, pred_vals, succ_vals, reverse=False):
    """ Evaluates a successive values from previous values with an optional
    direction reversal flag, outputting a three-length tuple that includes the
    successive values in the first argument.

    :param pred_vals: predecessor values (NumPy array).
    :param succ_vals: succecessor values (see step()).
    :param reverse: boolean flag (default False) to reverse direction.

    :return vals: a dictionary including both predecessor and successor values.
    :return dims: a dictionary with dimension indices for the values in vals.
    :return kwargs: a dictionary that includes optional keywords for eval_tran()
    """

    if succ_vals is None:
      assert self._tran is not None, "No transitional function specified"
    if isinstance(pred_vals, dict):
      pred_vals = pred_vals[self.name]
    kwargs = dict() # to pass over to eval_tran()
    if succ_vals is None:
      if self._delta is None:
        succ_vals = {0} if isscalar(pred_vals) else pred_vals
      else:
        delta = self.eval_delta()
        succ_vals = self.apply_delta(pred_vals, delta)

    #---------------------------------------------------------------------------
    def _reshape_vals(pred, succ):
      dims = {}
      ndim = 0

      # Now reshape the values according to succ > prev dimensionality
      if issingleton(succ):
        dims.update({self._name+"'": None})
      else:
        dims.update({self._name+"'": ndim})
        ndim += 1
      if issingleton(pred):
        dims.update({self._name: None})
      else:
        dims.update({self._name: ndim})
        ndim += 1

      if ndim == 2: # pred_vals distributed along inner dimension:
        pred = pred.reshape([1, pred.size])
        succ = succ.reshape([succ.size, 1])
      return pred, succ, dims

    #---------------------------------------------------------------------------
    # Scalar treatment is the most trivial and ignores reverse
    if self._tran is None or self._tran.isscalar:
      if isunitsetint(succ_vals):
        succ_vals = self.evaluate(succ_vals, use_pfun=False)[self._name]
      elif isunitsetfloat(succ_vals):
        assert self._vtype in VTYPES[float], \
            "Inverse CDF sampling for scalar probabilities unavailable for " + \
            "{} data type".format(self._vtype)
        cdf_val = list(succ_vals)[0]
        lo, hi = min(self._limits), max(self._limits)
        succ_val = lo*(1.-cdf_val) + hi*cdf_val
        if self._ufun is not None:
          succ_val = self.ufun[-1](succ_val)

      prob = self._tran() if self._tran is not None else None
      pred_vals, succ_vals, dims = _reshape_vals(pred_vals, succ_vals)
                  
    # Handle discrete non-callables
    elif not self._tran.callable:
      if reverse and not self._tran.ismulti and not self.__sym_tran:
        warnings.warn("Reverse direction called from asymmetric transitional")
      prob = self._tran() if not self._tran.ismulti else \
             self._tran[int(reverse)]()
      if isunitset(succ_vals):
        succ_vals, pred_idx, succ_idx = matrix_cond_sample(pred_vals, 
                                                           succ_vals, 
                                                           prob=prob, 
                                                           vset=self._vset) 
        kwargs.update({'pred_idx': pred_idx, 'succ_idx': succ_idx})
      pred_vals, succ_vals, dims = _reshape_vals(pred_vals, succ_vals)

    # That just leaves callables
    else:
      kwds = {self._name: pred_vals}
      if isunitset(succ_vals):
        assert self._tfun is not None, \
            "Conditional sampling requires setting CDF and ICDF " + \
            "conditional functions using rv.set.tfun()"
        assert isscalar(pred_vals), \
            "Successor sampling only possible with scalar predecessors"
        succ_vals = list(succ_vals)[0]
        if type(succ_vals) in VTYPES[int] or type(succ_vals) in VTYPES[np.uint]:
          lo, hi = min(self._ulims), max(self._ulims)
          kwds.update({self._name+"'": np.array([lo, hi], dtype=float)})
          lohi = self._tfun[0](**kwds)
          lo, hi = float(min(lohi)), float(max(lohi))
          succ_vals = uniform(lo, hi, succ_vals,
                              isinstance(self._vset[0], tuple),
                              isinstance(self._vset[1], tuple))
        else:
          succ_vals = np.atleast_1d(succ_vals)
        kwds.update({self._name: pred_vals,
                     self._name+"'": succ_vals})
        succ_vals = self._tfun[1](**kwds)
      elif not isscalar(succ_vals):
        succ_vals = np.atleast_1d(succ_vals)
      pred_vals, succ_vals, dims = _reshape_vals(pred_vals, succ_vals)

    vals = collections.OrderedDict({self._name+"'": succ_vals,
                                    self._name: pred_vals})
    kwargs.update({'reverse': reverse})
    return vals, dims, kwargs