Beispiel #1
0
    def __init__(self, latent_vars=None, data=None):
        """Initialization.

    Parameters
    ----------
    latent_vars : dict, optional
      Collection of latent variables (of type ``RandomVariable`` or
      ``tf.Tensor``) to perform inference on. Each random variable is
      binded to another random variable; the latter will infer the
      former conditional on data.
    data : dict, optional
      Data dictionary which binds observed variables (of type
      ``RandomVariable`` or ``tf.Tensor``) to their realizations (of
      type ``tf.Tensor``). It can also bind placeholders (of type
      ``tf.Tensor``) used in the model to their realizations; and
      prior latent variables (of type ``RandomVariable``) to posterior
      latent variables (of type ``RandomVariable``).

    Examples
    --------
    >>> mu = Normal(loc=tf.constant(0.0), scale=tf.constant(1.0))
    >>> x = Normal(loc=tf.ones(50) * mu, scale=tf.constant(1.0))
    >>>
    >>> qmu_loc = tf.Variable(tf.random_normal([]))
    >>> qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
    >>> qmu = Normal(loc=qmu_loc, scale=qmu_scale)
    >>>
    >>> inference = ed.Inference({mu: qmu}, data={x: tf.zeros(50)})
    """
        sess = get_session()
        if latent_vars is None:
            latent_vars = {}
        if data is None:
            data = {}

        check_latent_vars(latent_vars)
        self.latent_vars = latent_vars

        check_data(data)
        self.data = {}
        for key, value in six.iteritems(data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                self.data[key] = value
            elif isinstance(key, (RandomVariable, tf.Tensor)):
                if isinstance(value, (RandomVariable, tf.Tensor)):
                    self.data[key] = value
                elif isinstance(
                        value, (float, list, int, np.ndarray, np.number, str)):
                    # If value is a Python type, store it in the graph.
                    # Assign its placeholder with the key's data type.
                    with tf.variable_scope("data"):
                        ph = tf.placeholder(key.dtype, np.shape(value))
                        var = tf.Variable(ph, trainable=False, collections=[])
                        sess.run(var.initializer, {ph: value})
                        self.data[key] = var
Beispiel #2
0
    def __init__(self, latent_vars=None, data=None):
        """Create an inference algorithm.

    Args:
      latent_vars: dict, optional.
        Collection of latent variables (of type `RandomVariable` or
        `tf.Tensor`) to perform inference on. Each random variable is
        binded to another random variable; the latter will infer the
        former conditional on data.
      data: dict, optional.
        Data dictionary which binds observed variables (of type
        `RandomVariable` or `tf.Tensor`) to their realizations (of
        type `tf.Tensor`). It can also bind placeholders (of type
        `tf.Tensor`) used in the model to their realizations; and
        prior latent variables (of type `RandomVariable`) to posterior
        latent variables (of type `RandomVariable`).
    """
        sess = get_session()
        if latent_vars is None:
            latent_vars = {}
        if data is None:
            data = {}

        check_latent_vars(latent_vars)
        self.latent_vars = latent_vars

        check_data(data)
        self.data = {}
        for key, value in six.iteritems(data):
            if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
                self.data[key] = value
            elif isinstance(key, (RandomVariable, tf.Tensor)):
                if isinstance(value, (RandomVariable, tf.Tensor)):
                    self.data[key] = value
                elif isinstance(
                        value, (float, list, int, np.ndarray, np.number, str)):
                    # If value is a Python type, store it in the graph.
                    # Assign its placeholder with the key's data type.
                    with tf.variable_scope(None, default_name="data"):
                        ph = tf.placeholder(key.dtype, np.shape(value))
                        var = tf.Variable(ph, trainable=False, collections=[])
                        sess.run(var.initializer, {ph: value})
                        self.data[key] = var
Beispiel #3
0
  def __init__(self, latent_vars=None, data=None):
    """Create an inference algorithm.

    Args:
      latent_vars: dict, optional.
        Collection of latent variables (of type `RandomVariable` or
        `tf.Tensor`) to perform inference on. Each random variable is
        binded to another random variable; the latter will infer the
        former conditional on data.
      data: dict, optional.
        Data dictionary which binds observed variables (of type
        `RandomVariable` or `tf.Tensor`) to their realizations (of
        type `tf.Tensor`). It can also bind placeholders (of type
        `tf.Tensor`) used in the model to their realizations; and
        prior latent variables (of type `RandomVariable`) to posterior
        latent variables (of type `RandomVariable`).
    """
    sess = get_session()
    if latent_vars is None:
      latent_vars = {}
    if data is None:
      data = {}

    check_latent_vars(latent_vars)
    self.latent_vars = latent_vars

    check_data(data)
    self.data = {}
    for key, value in six.iteritems(data):
      if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
        self.data[key] = value
      elif isinstance(key, (RandomVariable, tf.Tensor)):
        if isinstance(value, (RandomVariable, tf.Tensor)):
          self.data[key] = value
        elif isinstance(value, (float, list, int, np.ndarray, np.number, str)):
          # If value is a Python type, store it in the graph.
          # Assign its placeholder with the key's data type.
          with tf.variable_scope("data"):
            ph = tf.placeholder(key.dtype, np.shape(value))
            var = tf.Variable(ph, trainable=False, collections=[])
            sess.run(var.initializer, {ph: value})
            self.data[key] = var
Beispiel #4
0
def ppc(T, data, latent_vars=None, n_samples=100):
  """Posterior predictive check
  (Rubin, 1984; Meng, 1994; Gelman, Meng, and Stern, 1996).

  PPC's form an empirical distribution for the predictive discrepancy,

  $p(T\mid x) = \int p(T(x^{\\text{rep}})\mid z) p(z\mid x) dz$

  by drawing replicated data sets $x^{\\text{rep}}$ and
  calculating $T(x^{\\text{rep}})$ for each data set. Then it
  compares it to $T(x)$.

  If `data` is inputted with the prior predictive distribution, then
  it is a prior predictive check (Box, 1980).

  Args:
    T: function.
      Discrepancy function, which takes a dictionary of data and
      dictionary of latent variables as input and outputs a `tf.Tensor`.
    data: dict.
      Data to compare to. It binds observed variables (of type
      `RandomVariable` or `tf.Tensor`) to their realizations (of
      type `tf.Tensor`). It can also bind placeholders (of type
      `tf.Tensor`) used in the model to their realizations.
    latent_vars: dict, optional.
      Collection of random variables (of type `RandomVariable` or
      `tf.Tensor`) binded to their inferred posterior. This argument
      is used when the discrepancy is a function of latent variables.
    n_samples: int, optional.
      Number of replicated data sets.

  Returns:
    list of np.ndarray.
    List containing the reference distribution, which is a NumPy array
    with `n_samples` elements,

    $(T(x^{{\\text{rep}},1}, z^{1}), ...,
       T(x^{\\text{rep,nsamples}}, z^{\\text{nsamples}}))$

    and the realized discrepancy, which is a NumPy array with
    `n_samples` elements,

    $(T(x, z^{1}), ..., T(x, z^{\\text{nsamples}})).$


  #### Examples

  ```python
  # build posterior predictive after inference:
  # it is parameterized by a posterior sample
  x_post = ed.copy(x, {z: qz, beta: qbeta})

  # posterior predictive check
  # T is a user-defined function of data, T(data)
  T = lambda xs, zs: tf.reduce_mean(xs[x_post])
  ed.ppc(T, data={x_post: x_train})

  # in general T is a discrepancy function of the data (both response and
  # covariates) and latent variables, T(data, latent_vars)
  T = lambda xs, zs: tf.reduce_mean(zs[z])
  ed.ppc(T, data={y_post: y_train, x_ph: x_train},
         latent_vars={z: qz, beta: qbeta})

  # prior predictive check
  # run ppc on original x
  ed.ppc(T, data={x: x_train})
  ```
  """
  sess = get_session()
  if not callable(T):
    raise TypeError("T must be a callable function.")

  check_data(data)
  if latent_vars is None:
    latent_vars = {}

  check_latent_vars(latent_vars)
  if not isinstance(n_samples, int):
    raise TypeError("n_samples must have type int.")

  # Build replicated latent variables.
  zrep = {key: tf.convert_to_tensor(value)
          for key, value in six.iteritems(latent_vars)}

  # Build replicated data.
  xrep = {x: (x.value() if isinstance(x, RandomVariable) else obs)
          for x, obs in six.iteritems(data)}

  # Create feed_dict for data placeholders that the model conditions
  # on; it is necessary for all session runs.
  feed_dict = {key: value for key, value in six.iteritems(data)
               if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type}

  # Calculate discrepancy over many replicated data sets and latent
  # variables.
  Trep = T(xrep, zrep)
  Tobs = T(data, zrep)
  Treps = []
  Ts = []
  for _ in range(n_samples):
    # Take a forward pass (session run) to get new samples for
    # each calculation of the discrepancy.
    # Alternatively, we could unroll the graph by registering this
    # operation `n_samples` times, each for different parent nodes
    # representing `xrep` and `zrep`. But it's expensive.
    Treps += [sess.run(Trep, feed_dict)]
    Ts += [sess.run(Tobs, feed_dict)]

  return [np.stack(Treps), np.stack(Ts)]
Beispiel #5
0
def evaluate(metrics, data, n_samples=500, output_key=None):
    """Evaluate fitted model using a set of metrics.

  A metric, or scoring rule (Winkler, 1994), is a function of observed
  data under the posterior predictive distribution. For example in
  supervised metrics such as classification accuracy, the observed
  data (true output) is compared to the posterior predictive's mean
  (predicted output). In unsupervised metrics such as log-likelihood,
  the probability of observing the data is calculated under the
  posterior predictive's log-density.

  Parameters
  ----------
  metrics : list of str or str
    List of metrics or a single metric:
    ``'binary_accuracy'``,
    ``'categorical_accuracy'``,
    ``'sparse_categorical_accuracy'``,
    ``'log_loss'`` or ``'binary_crossentropy'``,
    ``'categorical_crossentropy'``,
    ``'sparse_categorical_crossentropy'``,
    ``'hinge'``,
    ``'squared_hinge'``,
    ``'mse'`` or ``'MSE'`` or ``'mean_squared_error'``,
    ``'mae'`` or ``'MAE'`` or ``'mean_absolute_error'``,
    ``'mape'`` or ``'MAPE'`` or ``'mean_absolute_percentage_error'``,
    ``'msle'`` or ``'MSLE'`` or ``'mean_squared_logarithmic_error'``,
    ``'poisson'``,
    ``'cosine'`` or ``'cosine_proximity'``,
    ``'log_lik'`` or ``'log_likelihood'``.
  data : dict
    Data to evaluate model with. It binds observed variables (of type
    ``RandomVariable`` or ``tf.Tensor``) to their realizations (of
    type ``tf.Tensor``). It can also bind placeholders (of type
    ``tf.Tensor``) used in the model to their realizations.
  n_samples : int, optional
    Number of posterior samples for making predictions, using the
    posterior predictive distribution.
  output_key : RandomVariable or tf.Tensor, optional
    It is the key in ``data`` which corresponds to the model's output.

  Returns
  -------
  list of float or float
    A list of evaluations or a single evaluation.

  Raises
  ------
  NotImplementedError
    If an input metric does not match an implemented metric in Edward.

  Examples
  --------
  >>> # build posterior predictive after inference: it is
  >>> # parameterized by a posterior sample
  >>> x_post = ed.copy(x, {z: qz, beta: qbeta})
  >>>
  >>> # log-likelihood performance
  >>> ed.evaluate('log_likelihood', data={x_post: x_train})
  >>>
  >>> # classification accuracy
  >>> # here, ``x_ph`` is any features the model is defined with respect to,
  >>> # and ``y_post`` is the posterior predictive distribution
  >>> ed.evaluate('binary_accuracy', data={y_post: y_train, x_ph: x_train})
  >>>
  >>> # mean squared error
  >>> ed.evaluate('mean_squared_error', data={y: y_data, x: x_data})
  """
    sess = get_session()
    if isinstance(metrics, str):
        metrics = [metrics]
    elif not isinstance(metrics, list):
        raise TypeError("metrics must have type str or list.")

    check_data(data)
    if not isinstance(n_samples, int):
        raise TypeError("n_samples must have type int.")

    if output_key is None:
        # Default output_key to the only data key that isn't a placeholder.
        keys = [
            key for key in six.iterkeys(data) if
            not isinstance(key, tf.Tensor) or "Placeholder" not in key.op.type
        ]
        if len(keys) == 1:
            output_key = keys[0]
        else:
            raise KeyError("User must specify output_key.")
    elif not isinstance(output_key, RandomVariable):
        raise TypeError("output_key must have type RandomVariable.")

    # Create feed_dict for data placeholders that the model conditions
    # on; it is necessary for all session runs.
    feed_dict = {
        key: value
        for key, value in six.iteritems(data)
        if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type
    }

    # Form true data.
    y_true = data[output_key]
    # Make predictions (if there are any supervised metrics).
    if metrics != ['log_lik'] and metrics != ['log_likelihood']:
        binary_discrete = (Bernoulli, Binomial)
        categorical_discrete = (Categorical, Multinomial, OneHotCategorical)
        if isinstance(output_key, binary_discrete + categorical_discrete):
            # Average over realizations of their probabilities, then predict
            # via argmax over probabilities.
            probs = [
                sess.run(output_key.probs, feed_dict) for _ in range(n_samples)
            ]
            probs = tf.add_n(probs) / tf.cast(n_samples, tf.float32)
            if isinstance(output_key, binary_discrete):
                # make random prediction whenever probs is exactly 0.5
                random = tf.random_uniform(shape=tf.shape(probs))
                y_pred = tf.round(tf.where(tf.equal(0.5, probs), random,
                                           probs))
            else:
                y_pred = tf.argmax(probs, len(probs.shape) - 1)
        else:
            # Monte Carlo estimate the mean of the posterior predictive.
            y_pred = [
                sess.run(output_key, feed_dict) for _ in range(n_samples)
            ]
            y_pred = tf.cast(tf.add_n(y_pred), tf.float32) / \
                tf.cast(n_samples, tf.float32)

    # Evaluate y_true (according to y_pred if supervised) for all metrics.
    evaluations = []
    for metric in metrics:
        if metric == 'accuracy' or metric == 'crossentropy':
            # automate binary or sparse cat depending on its support
            support = sess.run(tf.reduce_max(y_true), feed_dict)
            if support <= 1:
                metric = 'binary_' + metric
            else:
                metric = 'sparse_categorical_' + metric

        if metric == 'binary_accuracy':
            evaluations += [binary_accuracy(y_true, y_pred)]
        elif metric == 'categorical_accuracy':
            evaluations += [categorical_accuracy(y_true, y_pred)]
        elif metric == 'sparse_categorical_accuracy':
            evaluations += [sparse_categorical_accuracy(y_true, y_pred)]
        elif metric == 'log_loss' or metric == 'binary_crossentropy':
            evaluations += [binary_crossentropy(y_true, y_pred)]
        elif metric == 'categorical_crossentropy':
            evaluations += [categorical_crossentropy(y_true, y_pred)]
        elif metric == 'sparse_categorical_crossentropy':
            evaluations += [sparse_categorical_crossentropy(y_true, y_pred)]
        elif metric == 'hinge':
            evaluations += [hinge(y_true, y_pred)]
        elif metric == 'squared_hinge':
            evaluations += [squared_hinge(y_true, y_pred)]
        elif (metric == 'mse' or metric == 'MSE'
              or metric == 'mean_squared_error'):
            evaluations += [mean_squared_error(y_true, y_pred)]
        elif (metric == 'mae' or metric == 'MAE'
              or metric == 'mean_absolute_error'):
            evaluations += [mean_absolute_error(y_true, y_pred)]
        elif (metric == 'mape' or metric == 'MAPE'
              or metric == 'mean_absolute_percentage_error'):
            evaluations += [mean_absolute_percentage_error(y_true, y_pred)]
        elif (metric == 'msle' or metric == 'MSLE'
              or metric == 'mean_squared_logarithmic_error'):
            evaluations += [mean_squared_logarithmic_error(y_true, y_pred)]
        elif metric == 'poisson':
            evaluations += [poisson(y_true, y_pred)]
        elif metric == 'cosine' or metric == 'cosine_proximity':
            evaluations += [cosine_proximity(y_true, y_pred)]
        elif metric == 'log_lik' or metric == 'log_likelihood':
            # Monte Carlo estimate the log-density of the posterior predictive.
            tensor = tf.reduce_mean(output_key.log_prob(y_true))
            log_pred = [sess.run(tensor, feed_dict) for _ in range(n_samples)]
            log_pred = tf.add_n(log_pred) / tf.cast(n_samples, tf.float32)
            evaluations += [log_pred]
        else:
            raise NotImplementedError(
                "Metric is not implemented: {}".format(metric))

    if len(evaluations) == 1:
        return sess.run(evaluations[0], feed_dict)
    else:
        return sess.run(evaluations, feed_dict)
Beispiel #6
0
def ppc(T, data, latent_vars=None, n_samples=100):
    """Posterior predictive check
  [@rubin1984bayesianly; @meng1994posterior; @gelman1996posterior].

  PPC's form an empirical distribution for the predictive discrepancy,

  $p(T\mid x) = \int p(T(x^{\\text{rep}})\mid z) p(z\mid x) dz$

  by drawing replicated data sets $x^{\\text{rep}}$ and
  calculating $T(x^{\\text{rep}})$ for each data set. Then it
  compares it to $T(x)$.

  If `data` is inputted with the prior predictive distribution, then
  it is a prior predictive check [@box1980sampling].

  Args:
    T: function.
      Discrepancy function, which takes a dictionary of data and
      dictionary of latent variables as input and outputs a `tf.Tensor`.
    data: dict.
      Data to compare to. It binds observed variables (of type
      `RandomVariable` or `tf.Tensor`) to their realizations (of
      type `tf.Tensor`). It can also bind placeholders (of type
      `tf.Tensor`) used in the model to their realizations.
    latent_vars: dict, optional.
      Collection of random variables (of type `RandomVariable` or
      `tf.Tensor`) binded to their inferred posterior. This argument
      is used when the discrepancy is a function of latent variables.
    n_samples: int, optional.
      Number of replicated data sets.

  Returns:
    list of np.ndarray.
    List containing the reference distribution, which is a NumPy array
    with `n_samples` elements,

    $(T(x^{{\\text{rep}},1}, z^{1}), ...,
       T(x^{\\text{rep,nsamples}}, z^{\\text{nsamples}}))$

    and the realized discrepancy, which is a NumPy array with
    `n_samples` elements,

    $(T(x, z^{1}), ..., T(x, z^{\\text{nsamples}})).$


  #### Examples

  ```python
  # build posterior predictive after inference:
  # it is parameterized by a posterior sample
  x_post = ed.copy(x, {z: qz, beta: qbeta})

  # posterior predictive check
  # T is a user-defined function of data, T(data)
  T = lambda xs, zs: tf.reduce_mean(xs[x_post])
  ed.ppc(T, data={x_post: x_train})

  # in general T is a discrepancy function of the data (both response and
  # covariates) and latent variables, T(data, latent_vars)
  T = lambda xs, zs: tf.reduce_mean(zs[z])
  ed.ppc(T, data={y_post: y_train, x_ph: x_train},
         latent_vars={z: qz, beta: qbeta})

  # prior predictive check
  # run ppc on original x
  ed.ppc(T, data={x: x_train})
  ```
  """
    sess = get_session()
    if not callable(T):
        raise TypeError("T must be a callable function.")

    check_data(data)
    if latent_vars is None:
        latent_vars = {}

    check_latent_vars(latent_vars)
    if not isinstance(n_samples, int):
        raise TypeError("n_samples must have type int.")

    # Build replicated latent variables.
    zrep = {
        key: tf.convert_to_tensor(value)
        for key, value in six.iteritems(latent_vars)
    }

    # Build replicated data.
    xrep = {
        x: (x.value() if isinstance(x, RandomVariable) else obs)
        for x, obs in six.iteritems(data)
    }

    # Create feed_dict for data placeholders that the model conditions
    # on; it is necessary for all session runs.
    feed_dict = {
        key: value
        for key, value in six.iteritems(data)
        if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type
    }

    # Calculate discrepancy over many replicated data sets and latent
    # variables.
    Trep = T(xrep, zrep)
    Tobs = T(data, zrep)
    Treps = []
    Ts = []
    for _ in range(n_samples):
        # Take a forward pass (session run) to get new samples for
        # each calculation of the discrepancy.
        # Alternatively, we could unroll the graph by registering this
        # operation `n_samples` times, each for different parent nodes
        # representing `xrep` and `zrep`. But it's expensive.
        Treps += [sess.run(Trep, feed_dict)]
        Ts += [sess.run(Tobs, feed_dict)]

    return [np.stack(Treps), np.stack(Ts)]
Beispiel #7
0
def evaluate(metrics, data, n_samples=500, output_key=None, seed=None):
    """Evaluate fitted model using a set of metrics.

  A metric, or scoring rule [@winkler1994evaluating], is a function of
  observed data under the posterior predictive distribution. For
  example in supervised metrics such as classification accuracy, the
  observed data (true output) is compared to the posterior
  predictive's mean (predicted output). In unsupervised metrics such
  as log-likelihood, the probability of observing the data is
  calculated under the posterior predictive's log-density.

  Args:
    metrics: list of str and/or (str, params: dict) tuples, str,
    or (str, params: dict) tuple.
      List of metrics or a single metric:
      `'binary_accuracy'`,
      `'categorical_accuracy'`,
      `'sparse_categorical_accuracy'`,
      `'log_loss'` or `'binary_crossentropy'`,
      `'categorical_crossentropy'`,
      `'sparse_categorical_crossentropy'`,
      `'hinge'`,
      `'squared_hinge'`,
      `'mse'` or `'MSE'` or `'mean_squared_error'`,
      `'mae'` or `'MAE'` or `'mean_absolute_error'`,
      `'mape'` or `'MAPE'` or `'mean_absolute_percentage_error'`,
      `'msle'` or `'MSLE'` or `'mean_squared_logarithmic_error'`,
      `'poisson'`,
      `'cosine'` or `'cosine_proximity'`,
      `'crps'` or `'continuous_ranked_probability_score'`,
      `'log_lik'` or `'log_likelihood'`.
      In lieu of a metric string, this method also accepts (str, params: dict)
      tuples; the first element of this tuple is the metric string, and
      the second is a dict of associated params. At present, this dict only
      expects one key, `'average'`, which stipulates the type of averaging to
      perform on those metrics that permit binary averaging. Permissible
      options include: `None`, `'macro'` and `'micro'`.
    data: dict.
      Data to evaluate model with. It binds observed variables (of type
      `RandomVariable` or `tf.Tensor`) to their realizations (of
      type `tf.Tensor`). It can also bind placeholders (of type
      `tf.Tensor`) used in the model to their realizations.
    n_samples: int.
      Number of posterior samples for making predictions, using the
      posterior predictive distribution.
    output_key: RandomVariable or tf.Tensor.
      It is the key in `data` which corresponds to the model's output.
    seed: a Python integer. Used to create a random seed for the
      distribution

  Returns:
    list of float or float.
    A list of evaluations or a single evaluation.

  Raises:
    NotImplementedError.
    If an input metric does not match an implemented metric in Edward.

  #### Examples

  ```python
  # build posterior predictive after inference: it is
  # parameterized by a posterior sample
  x_post = ed.copy(x, {z: qz, beta: qbeta})

  # log-likelihood performance
  ed.evaluate('log_likelihood', data={x_post: x_train})

  # classification accuracy
  # here, `x_ph` is any features the model is defined with respect to,
  # and `y_post` is the posterior predictive distribution
  ed.evaluate('binary_accuracy', data={y_post: y_train, x_ph: x_train})

  # mean squared error
  ed.evaluate('mean_squared_error', data={y: y_data, x: x_data})
  ```

  # mean squared logarithmic error with `'micro'` averaging
  ed.evaluate(('mean_squared_logarithmic_error', {'average': 'micro'}),
              data={y: y_data, x: x_data})
  """
    sess = get_session()
    if isinstance(metrics, str):
        metrics = [metrics]
    elif callable(metrics):
        metrics = [metrics]
    elif not isinstance(metrics, list):
        raise TypeError("metrics must have type str or list, or be callable.")

    check_data(data)
    if not isinstance(n_samples, int):
        raise TypeError("n_samples must have type int.")

    if output_key is None:
        # Default output_key to the only data key that isn't a placeholder.
        keys = [
            key for key in six.iterkeys(data) if
            not isinstance(key, tf.Tensor) or "Placeholder" not in key.op.type
        ]
        if len(keys) == 1:
            output_key = keys[0]
        else:
            raise KeyError("User must specify output_key.")
    elif not isinstance(output_key, RandomVariable):
        raise TypeError("output_key must have type RandomVariable.")

    # Create feed_dict for data placeholders that the model conditions
    # on; it is necessary for all session runs.
    feed_dict = {
        key: value
        for key, value in six.iteritems(data)
        if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type
    }

    # Form true data.
    y_true = data[output_key]
    # Make predictions (if there are any supervised metrics).
    if metrics != ['log_lik'] and metrics != ['log_likelihood']:
        binary_discrete = (Bernoulli, Binomial)
        categorical_discrete = (Categorical, Multinomial, OneHotCategorical)
        total_count = sess.run(
            getattr(output_key, 'total_count', tf.constant(1.)))
        if isinstance(output_key, binary_discrete + categorical_discrete):
            # Average over realizations of their probabilities, then predict
            # via argmax over probabilities.
            probs = [
                sess.run(output_key.probs, feed_dict) for _ in range(n_samples)
            ]
            probs = np.sum(probs, axis=0) / n_samples
            if isinstance(output_key, binary_discrete):
                # make random prediction whenever probs is exactly 0.5
                random = tf.random_uniform(shape=tf.shape(probs))
                y_pred = tf.round(tf.where(tf.equal(0.5, probs), random,
                                           probs))
            else:
                if total_count > 1:
                    mode = compute_multinomial_mode(probs, total_count, seed)
                    if len(output_key.sample_shape):
                        y_pred = tf.reshape(
                            tf.tile(mode, output_key.sample_shape),
                            [-1, len(probs)])
                    else:
                        y_pred = mode
                else:
                    y_pred = tf.argmax(probs, len(probs.shape) - 1)
            probs = tf.constant(probs)
        else:
            # Monte Carlo estimate the mean of the posterior predictive.
            y_pred = [
                sess.run(output_key, feed_dict) for _ in range(n_samples)
            ]
            y_pred = tf.cast(tf.add_n(y_pred), y_pred[0].dtype) / \
                tf.cast(n_samples, y_pred[0].dtype)
        if len(y_true.shape) == 0:
            y_true = tf.expand_dims(y_true, 0)
            y_pred = tf.expand_dims(y_pred, 0)

    # Evaluate y_true (according to y_pred if supervised) for all metrics.
    evaluations = []
    for metric in metrics:
        if isinstance(metric, tuple):
            metric, params = metric
        else:
            params = {}
        if metric == 'accuracy' or metric == 'crossentropy':
            # automate binary or sparse cat depending on its support
            support = sess.run(tf.reduce_max(y_true), feed_dict)
            if support <= 1:
                metric = 'binary_' + metric
            else:
                metric = 'sparse_categorical_' + metric

        if metric == 'binary_accuracy':
            evaluations += [binary_accuracy(y_true, y_pred, **params)]
        elif metric == 'categorical_accuracy':
            evaluations += [categorical_accuracy(y_true, y_pred, **params)]
        elif metric == 'sparse_categorical_accuracy':
            evaluations += [
                sparse_categorical_accuracy(y_true, y_pred, **params)
            ]
        elif metric == 'log_loss' or metric == 'binary_crossentropy':
            evaluations += [binary_crossentropy(y_true, y_pred, **params)]
        elif metric == 'categorical_crossentropy':
            evaluations += [categorical_crossentropy(y_true, y_pred, **params)]
        elif metric == 'sparse_categorical_crossentropy':
            evaluations += [
                sparse_categorical_crossentropy(y_true, y_pred, **params)
            ]
        elif metric == 'multinomial_accuracy':
            evaluations += [multinomial_accuracy(y_true, y_pred, **params)]
        elif metric == 'kl_divergence':
            y_true_ = y_true / total_count
            y_pred_ = probs
            evaluations += [kl_divergence(y_true_, y_pred_, **params)]
        elif metric == 'hinge':
            evaluations += [hinge(y_true, y_pred, **params)]
        elif metric == 'squared_hinge':
            evaluations += [squared_hinge(y_true, y_pred, **params)]
        elif (metric == 'mse' or metric == 'MSE'
              or metric == 'mean_squared_error'):
            evaluations += [mean_squared_error(y_true, y_pred, **params)]
        elif (metric == 'mae' or metric == 'MAE'
              or metric == 'mean_absolute_error'):
            evaluations += [mean_absolute_error(y_true, y_pred, **params)]
        elif (metric == 'mape' or metric == 'MAPE'
              or metric == 'mean_absolute_percentage_error'):
            evaluations += [
                mean_absolute_percentage_error(y_true, y_pred, **params)
            ]
        elif (metric == 'msle' or metric == 'MSLE'
              or metric == 'mean_squared_logarithmic_error'):
            evaluations += [
                mean_squared_logarithmic_error(y_true, y_pred, **params)
            ]
        elif metric == 'poisson':
            evaluations += [poisson(y_true, y_pred, **params)]
        elif metric == 'cosine' or metric == 'cosine_proximity':
            evaluations += [cosine_proximity(y_true, y_pred, **params)]
        elif metric == 'crps' or metric == 'continuous_ranked_probability_score':
            evaluations += [
                continuous_ranked_probability_score(y_true, y_pred, **params)
            ]
        elif metric == 'log_lik' or metric == 'log_likelihood':
            # Monte Carlo estimate the log-density of the posterior predictive.
            tensor = tf.reduce_mean(output_key.log_prob(y_true))
            log_pred = [sess.run(tensor, feed_dict) for _ in range(n_samples)]
            log_pred = tf.add_n(log_pred) / tf.cast(n_samples, tensor.dtype)
            evaluations += [log_pred]
        elif callable(metric):
            evaluations += [metric(y_true, y_pred, **params)]
        else:
            raise NotImplementedError(
                "Metric is not implemented: {}".format(metric))

    if len(evaluations) == 1:
        return sess.run(evaluations[0], feed_dict)
    else:
        return sess.run(evaluations, feed_dict)
def get_model_probs(data, metrics='probs', n_samples=500):
    output_key = None
    # seed = None
    sess = get_session()
    if isinstance(metrics, str):
        metrics = [metrics]
    elif not isinstance(metrics, list):
        raise TypeError("metrics must have type str or list.")

    check_data(data)
    if not isinstance(n_samples, int):
        raise TypeError("n_samples must have type int.")

    if output_key is None:
        # Default output_key to the only data key that isn't a placeholder.
        keys = [key for key in six.iterkeys(data) if not
                isinstance(key, tf.Tensor) or "Placeholder" not in key.op.type]
        if len(keys) == 1:
            output_key = keys[0]
        else:
            raise KeyError("User must specify output_key.")
    elif not isinstance(output_key, RandomVariable):
        raise TypeError("output_key must have type RandomVariable.")

    # Create feed_dict for data placeholders that the model conditions
    # on; it is necessary for all session runs.
    feed_dict = {key: value for key, value in six.iteritems(data)
                 if
                 isinstance(key, tf.Tensor) and "Placeholder" in key.op.type}

    # Form true data.
    y_true = data[output_key]
    # Make predictions (if there are any supervised metrics).
    if metrics != ['log_lik'] and metrics != ['log_likelihood']:
        binary_discrete = (Bernoulli, Binomial)
        categorical_discrete = (Categorical, Multinomial, OneHotCategorical)
        total_count = sess.run(
            getattr(output_key, 'total_count', tf.constant(1.)))
        if isinstance(output_key, binary_discrete + categorical_discrete):
            # Average over realizations of their probabilities, then predict
            # via argmax over probabilities.
            probs = [sess.run(output_key.probs, feed_dict) for _ in
                     range(n_samples)]
            probs = np.sum(probs, axis=0) / n_samples
            if isinstance(output_key, binary_discrete):
                # make random prediction whenever probs is exactly 0.5
                random = tf.random_uniform(shape=tf.shape(probs))
                y_pred = tf.round(
                    tf.where(tf.equal(0.5, probs), random, probs))
            else:
                if total_count > 1:
                    raise Exception('todo multinomial')
                    # if len(output_key.sample_shape):
                    #     y_pred = tf.reshape(
                    #         tf.tile(mode, output_key.sample_shape),
                    #         [-1, len(probs)])
                    # else:
                    #     y_pred = mode
                else:
                    y_pred = tf.argmax(probs, len(probs.shape) - 1)
            probs = tf.constant(probs)
        else:
            # Monte Carlo estimate the mean of the posterior predictive.
            y_pred = [sess.run(output_key, feed_dict) for _ in
                      range(n_samples)]
            y_pred = tf.cast(tf.add_n(y_pred), y_pred[0].dtype) / \
                tf.cast(n_samples, y_pred[0].dtype)
        if len(y_true.shape) == 0:
            y_true = tf.expand_dims(y_true, 0)
            y_pred = tf.expand_dims(y_pred, 0)
    return probs
    def test(self):
        with self.test_session():
            x = Normal(0.0, 1.0)
            qx = Normal(0.0, 1.0)
            x_ph = tf.placeholder(tf.float32, [])

            check_data({x: tf.constant(0.0)})
            check_data({x: np.float64(0.0)})
            check_data({x: np.int64(0)})
            check_data({x: 0.0})
            check_data({x: 0})
            check_data({x: False})
            check_data({x: '0'})
            check_data({x: x_ph})
            check_data({x: qx})
            check_data({2.0 * x: tf.constant(0.0)})
            self.assertRaises(TypeError, check_data, {0.0: x})
            self.assertRaises(TypeError, check_data, {x: tf.zeros(5)})
            self.assertRaises(TypeError, check_data, {x_ph: x})
            self.assertRaises(TypeError, check_data, {x_ph: x})
            self.assertRaises(TypeError, check_data,
                              {x: tf.constant(0, tf.float64)})
            self.assertRaises(TypeError, check_data, {x_ph: tf.constant(0.0)})

            x_vec = Normal(tf.constant([0.0]), tf.constant([1.0]))
            qx_vec = Normal(tf.constant([0.0]), tf.constant([1.0]))

            check_data({x_vec: qx_vec})
            check_data({x_vec: [0.0]})
            check_data({x_vec: [0]})
            check_data({x_vec: ['0']})
            self.assertRaises(TypeError, check_data, {x: qx_vec})
Beispiel #10
0
def evaluate(metrics, data, n_samples=500, output_key=None):
  """Evaluate fitted model using a set of metrics.

  A metric, or scoring rule (Winkler, 1994), is a function of observed
  data under the posterior predictive distribution. For example in
  supervised metrics such as classification accuracy, the observed
  data (true output) is compared to the posterior predictive's mean
  (predicted output). In unsupervised metrics such as log-likelihood,
  the probability of observing the data is calculated under the
  posterior predictive's log-density.

  Args:
    metrics: list of str or str.
      List of metrics or a single metric:
      `'binary_accuracy'`,
      `'categorical_accuracy'`,
      `'sparse_categorical_accuracy'`,
      `'log_loss'` or `'binary_crossentropy'`,
      `'categorical_crossentropy'`,
      `'sparse_categorical_crossentropy'`,
      `'hinge'`,
      `'squared_hinge'`,
      `'mse'` or `'MSE'` or `'mean_squared_error'`,
      `'mae'` or `'MAE'` or `'mean_absolute_error'`,
      `'mape'` or `'MAPE'` or `'mean_absolute_percentage_error'`,
      `'msle'` or `'MSLE'` or `'mean_squared_logarithmic_error'`,
      `'poisson'`,
      `'cosine'` or `'cosine_proximity'`,
      `'log_lik'` or `'log_likelihood'`.
    data: dict.
      Data to evaluate model with. It binds observed variables (of type
      `RandomVariable` or `tf.Tensor`) to their realizations (of
      type `tf.Tensor`). It can also bind placeholders (of type
      `tf.Tensor`) used in the model to their realizations.
    n_samples: int, optional.
      Number of posterior samples for making predictions, using the
      posterior predictive distribution.
    output_key: RandomVariable or tf.Tensor, optional.
      It is the key in `data` which corresponds to the model's output.

  Returns:
    list of float or float.
    A list of evaluations or a single evaluation.

  Raises:
    NotImplementedError.
    If an input metric does not match an implemented metric in Edward.

  #### Examples

  ```python
  # build posterior predictive after inference: it is
  # parameterized by a posterior sample
  x_post = ed.copy(x, {z: qz, beta: qbeta})

  # log-likelihood performance
  ed.evaluate('log_likelihood', data={x_post: x_train})

  # classification accuracy
  # here, `x_ph` is any features the model is defined with respect to,
  # and `y_post` is the posterior predictive distribution
  ed.evaluate('binary_accuracy', data={y_post: y_train, x_ph: x_train})

  # mean squared error
  ed.evaluate('mean_squared_error', data={y: y_data, x: x_data})
  ```
  """
  sess = get_session()
  if isinstance(metrics, str):
    metrics = [metrics]
  elif not isinstance(metrics, list):
    raise TypeError("metrics must have type str or list.")

  check_data(data)
  if not isinstance(n_samples, int):
    raise TypeError("n_samples must have type int.")

  if output_key is None:
    # Default output_key to the only data key that isn't a placeholder.
    keys = [key for key in six.iterkeys(data) if not
            isinstance(key, tf.Tensor) or "Placeholder" not in key.op.type]
    if len(keys) == 1:
      output_key = keys[0]
    else:
      raise KeyError("User must specify output_key.")
  elif not isinstance(output_key, RandomVariable):
    raise TypeError("output_key must have type RandomVariable.")

  # Create feed_dict for data placeholders that the model conditions
  # on; it is necessary for all session runs.
  feed_dict = {key: value for key, value in six.iteritems(data)
               if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type}

  # Form true data.
  y_true = data[output_key]
  # Make predictions (if there are any supervised metrics).
  if metrics != ['log_lik'] and metrics != ['log_likelihood']:
    binary_discrete = (Bernoulli, Binomial)
    categorical_discrete = (Categorical, Multinomial, OneHotCategorical)
    if isinstance(output_key, binary_discrete + categorical_discrete):
      # Average over realizations of their probabilities, then predict
      # via argmax over probabilities.
      probs = [sess.run(output_key.probs, feed_dict) for _ in range(n_samples)]
      probs = tf.add_n(probs) / tf.cast(n_samples, tf.float32)
      if isinstance(output_key, binary_discrete):
        # make random prediction whenever probs is exactly 0.5
        random = tf.random_uniform(shape=tf.shape(probs))
        y_pred = tf.round(tf.where(tf.equal(0.5, probs), random, probs))
      else:
        y_pred = tf.argmax(probs, len(probs.shape) - 1)
    else:
      # Monte Carlo estimate the mean of the posterior predictive.
      y_pred = [sess.run(output_key, feed_dict) for _ in range(n_samples)]
      y_pred = tf.cast(tf.add_n(y_pred), tf.float32) / \
          tf.cast(n_samples, tf.float32)

  # Evaluate y_true (according to y_pred if supervised) for all metrics.
  evaluations = []
  for metric in metrics:
    if metric == 'accuracy' or metric == 'crossentropy':
      # automate binary or sparse cat depending on its support
      support = sess.run(tf.reduce_max(y_true), feed_dict)
      if support <= 1:
        metric = 'binary_' + metric
      else:
        metric = 'sparse_categorical_' + metric

    if metric == 'binary_accuracy':
      evaluations += [binary_accuracy(y_true, y_pred)]
    elif metric == 'categorical_accuracy':
      evaluations += [categorical_accuracy(y_true, y_pred)]
    elif metric == 'sparse_categorical_accuracy':
      evaluations += [sparse_categorical_accuracy(y_true, y_pred)]
    elif metric == 'log_loss' or metric == 'binary_crossentropy':
      evaluations += [binary_crossentropy(y_true, y_pred)]
    elif metric == 'categorical_crossentropy':
      evaluations += [categorical_crossentropy(y_true, y_pred)]
    elif metric == 'sparse_categorical_crossentropy':
      evaluations += [sparse_categorical_crossentropy(y_true, y_pred)]
    elif metric == 'hinge':
      evaluations += [hinge(y_true, y_pred)]
    elif metric == 'squared_hinge':
      evaluations += [squared_hinge(y_true, y_pred)]
    elif (metric == 'mse' or metric == 'MSE' or
          metric == 'mean_squared_error'):
      evaluations += [mean_squared_error(y_true, y_pred)]
    elif (metric == 'mae' or metric == 'MAE' or
          metric == 'mean_absolute_error'):
      evaluations += [mean_absolute_error(y_true, y_pred)]
    elif (metric == 'mape' or metric == 'MAPE' or
          metric == 'mean_absolute_percentage_error'):
      evaluations += [mean_absolute_percentage_error(y_true, y_pred)]
    elif (metric == 'msle' or metric == 'MSLE' or
          metric == 'mean_squared_logarithmic_error'):
      evaluations += [mean_squared_logarithmic_error(y_true, y_pred)]
    elif metric == 'poisson':
      evaluations += [poisson(y_true, y_pred)]
    elif metric == 'cosine' or metric == 'cosine_proximity':
      evaluations += [cosine_proximity(y_true, y_pred)]
    elif metric == 'log_lik' or metric == 'log_likelihood':
      # Monte Carlo estimate the log-density of the posterior predictive.
      tensor = tf.reduce_mean(output_key.log_prob(y_true))
      log_pred = [sess.run(tensor, feed_dict) for _ in range(n_samples)]
      log_pred = tf.add_n(log_pred) / tf.cast(n_samples, tf.float32)
      evaluations += [log_pred]
    else:
      raise NotImplementedError("Metric is not implemented: {}".format(metric))

  if len(evaluations) == 1:
    return sess.run(evaluations[0], feed_dict)
  else:
    return sess.run(evaluations, feed_dict)
Beispiel #11
0
  def test(self):
    with self.test_session():
      x = Normal(0.0, 1.0)
      qx = Normal(0.0, 1.0)
      x_ph = tf.placeholder(tf.float32, [])

      check_data({x: tf.constant(0.0)})
      check_data({x: np.float64(0.0)})
      check_data({x: np.int64(0)})
      check_data({x: 0.0})
      check_data({x: 0})
      check_data({x: False})
      check_data({x: '0'})
      check_data({x: x_ph})
      check_data({x: qx})
      check_data({2.0 * x: tf.constant(0.0)})
      self.assertRaises(TypeError, check_data, {0.0: x})
      self.assertRaises(TypeError, check_data, {x: tf.zeros(5)})
      self.assertRaises(TypeError, check_data, {x_ph: x})
      self.assertRaises(TypeError, check_data, {x_ph: x})
      self.assertRaises(TypeError, check_data,
                        {x: tf.constant(0, tf.float64)})
      self.assertRaises(TypeError, check_data,
                        {x_ph: tf.constant(0.0)})

      x_vec = Normal(tf.constant([0.0]), tf.constant([1.0]))
      qx_vec = Normal(tf.constant([0.0]), tf.constant([1.0]))

      check_data({x_vec: qx_vec})
      check_data({x_vec: [0.0]})
      check_data({x_vec: [0]})
      check_data({x_vec: ['0']})
      self.assertRaises(TypeError, check_data, {x: qx_vec})