Example #1
0
def marginal_maxent_generic(dist, rvs, **kwargs):
    from cvxopt import matrix

    verbose = kwargs.get('verbose', False)
    logger = basic_logger('dit.maxentropy', verbose)

    rv_mode = kwargs.pop('rv_mode', None)

    A, b = marginal_constraints_generic(dist, rvs, rv_mode)

    # Reduce the size of A so that only nonzero elements are searched.
    # Also make it full rank.
    variables = isolate_zeros_generic(dist, rvs)
    Asmall = A[:, variables.nonzero]  # pylint: disable=no-member
    Asmall, b, rank = as_full_rank(Asmall, b)
    Asmall = matrix(Asmall)
    b = matrix(b)

    # Set cvx info level based on logging.INFO level.
    if logger.isEnabledFor(logging.INFO):
        show_progress = True
    else:
        show_progress = False

    logger.info("Finding initial distribution.")
    initial_x, _ = initial_point_generic(dist, rvs, A=Asmall, b=b,
                                         isolated=variables,
                                         show_progress=show_progress)
    initial_x = matrix(initial_x)
    objective = negentropy

    # We optimize the reduced problem.

    # For the gradient, we are going to keep the elements we know to be zero
    # at zero. Generally, the gradient is: log2(x_i) + 1 / ln(b)
    nonzero = variables.nonzero  # pylint: disable=no-member
    ln2 = np.log(2)

    def gradient(x):
        # This operates only on nonzero elements.

        xarr = np.asarray(x)
        # All of the optimization elements should be greater than zero
        # But occasional they might go slightly negative or zero.
        # In those cases, we will just set the gradient to zero and keep the
        # value fixed from that point forward.
        bad_x = xarr <= 0
        grad = np.log2(xarr) + 1 / ln2
        grad[bad_x] = 0
        return matrix(grad)

    logger.info("Finding maximum entropy distribution.")
    x, obj = frank_wolfe(objective, gradient, Asmall, b, initial_x, **kwargs)
    x = np.asarray(x).transpose()[0]

    # Rebuild the full distribution.
    xfinal = np.zeros(A.shape[1])
    xfinal[nonzero] = x

    return xfinal, obj  # , Asmall, b, variables
Example #2
0
def marginal_maxent_generic(dist, rvs, **kwargs):
    from cvxopt import matrix

    verbose = kwargs.get('verbose', False)
    logger = basic_logger('dit.maxentropy', verbose)

    rv_mode = kwargs.pop('rv_mode', None)

    A, b = marginal_constraints_generic(dist, rvs, rv_mode)

    # Reduce the size of A so that only nonzero elements are searched.
    # Also make it full rank.
    variables = isolate_zeros_generic(dist, rvs)
    Asmall = A[:, variables.nonzero] # pylint: disable=no-member
    Asmall, b, rank = as_full_rank(Asmall, b)
    Asmall = matrix(Asmall)
    b = matrix(b)

    # Set cvx info level based on logging.INFO level.
    if logger.isEnabledFor(logging.INFO):
        show_progress = True
    else:
        show_progress = False

    logger.info("Finding initial distribution.")
    initial_x, _ = initial_point_generic(dist, rvs, A=Asmall, b=b,
                                         isolated=variables,
                                         show_progress=show_progress)
    initial_x = matrix(initial_x)
    objective = negentropy

    # We optimize the reduced problem.

    # For the gradient, we are going to keep the elements we know to be zero
    # at zero. Generally, the gradient is: log2(x_i) + 1 / ln(b)
    nonzero = variables.nonzero # pylint: disable=no-member
    ln2 = np.log(2)
    def gradient(x):
        # This operates only on nonzero elements.

        xarr = np.asarray(x)
        # All of the optimization elements should be greater than zero
        # But occasional they might go slightly negative or zero.
        # In those cases, we will just set the gradient to zero and keep the
        # value fixed from that point forward.
        bad_x = xarr <= 0
        grad = np.log2(xarr) + 1 / ln2
        grad[bad_x] = 0
        return matrix(grad)

    logger.info("Finding maximum entropy distribution.")
    x, obj = frank_wolfe(objective, gradient, Asmall, b, initial_x, **kwargs)
    x = np.asarray(x).transpose()[0]

    # Rebuild the full distribution.
    xfinal = np.zeros(A.shape[1])
    xfinal[nonzero] = x

    return xfinal, obj#, Asmall, b, variables
Example #3
0
def frank_wolfe(objective, gradient, A, b, initial_x,
                maxiters=2000, tol=1e-4, clean=True, verbose=None):
    """
    Uses the Frank--Wolfe algorithm to minimize the convex objective.

    Minimization is subject to the linear equality constraint: A x = b.

    Assumes x should be nonnegative.

    Parameters
    ----------
    objective : callable
        The objective function. It would receive a ``cvxopt`` matrix for the
        input `x` and return the value of the objective function.
    gradient : callable
        The gradient function. It should receive a ``cvxopt`` matrix for the
        input `x` and return the value of the gradient evaluated at `x`.
    A : matrix
        A ``cvxopt`` matrix specifying the LHS linear equality constraints.
    b : matrix
        A ``cvxopt`` matrix specifying the RHS linear equality constraints.
    initial_x : matrix
        A ``cvxopt`` matrix specifying the initial `x` to use.
    maxiters : int
        The maximum number of iterations to perform. If convergence was not
        reached after the last iteration, a warning is issued and the current
        value of `x` is returned.
    tol : float
        The tolerance used to determine when we have converged to the optimum.
    clean : bool
        Occasionally, the iteration process will take nonnegative values to be
        ever so slightly negative. If ``True``, then we forcibly make such
        values equal to zero and renormalize the vector. This is an application
        specific decision and is probably not more generally useful.
    verbose : int
        An integer representing the logging level ala the ``logging`` module.
        If `None`, then (effectively) the log level is set to `WARNING`. For
        a bit more information, set this to `logging.INFO`. For a bit less,
        set this to `logging.ERROR`, or perhaps 100.

    """
    # Function level import to avoid circular import.
    from dit.algorithms.optutil import op_runner

    # Function level import to keep cvxopt dependency optional.
    # All variables should be cvxopt variables, not NumPy arrays
    from cvxopt import matrix
    from cvxopt.modeling import variable

    # Set up a custom logger.
    logger = basic_logger('dit.frankwolfe', verbose)

    # Set cvx info level based on logging.DEBUG level.
    if logger.isEnabledFor(logging.DEBUG):
        show_progress = True
    else:
        show_progress = False

    assert (A.size[1] == initial_x.size[0])

    n = initial_x.size[0]
    x = initial_x
    xdiff = 0

    TOL = 1e-7
    verbosechunk = maxiters / 10
    for i in range(maxiters):
        obj = objective(x)
        grad = gradient(x)

        xbar = variable(n)

        new_objective = grad.T * xbar
        constraints = []
        constraints.append((xbar >= 0))
        constraints.append((-TOL <= A * xbar - b))
        constraints.append((A * xbar - b <= TOL))

        logger.debug('FW Iteration: {}'.format(i))
        opt = op_runner(new_objective, constraints, show_progress=show_progress)
        if opt.status != 'optimal':
            msg = '\tFrank-Wolfe: Did not find optimal direction on '
            msg += 'iteration {}: {}'
            msg = msg.format(i, opt.status)
            logger.info(msg)

        # Calculate optimality gap
        xbar_opt = opt.variables()[0].value
        opt_bd = grad.T * (xbar_opt - x)

        msg = "i={:6}  obj={:10.7f}  opt_bd={:10.7f}  xdiff={:12.10f}"
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(msg.format(i, obj, opt_bd[0, 0], xdiff))
            logger.debug("")
        elif i % verbosechunk == 0:
            logger.info(msg.format(i, obj, opt_bd[0, 0], xdiff))

        xnew = (i * x + 2 * xbar_opt) / (i + 2)
        xdiff = np.linalg.norm(xnew - x)
        x = xnew

        if xdiff < tol:
            obj = objective(x)
            break
    else:
        msg = "Only converged to xdiff={:12.10f} after {} iterations. "
        msg += "Desired: {}"
        logger.warn(msg.format(xdiff, maxiters, tol))

    xopt = np.array(x)

    if clean:
        xopt[np.abs(xopt) < tol] = 0
        xopt /= xopt.sum()

    return xopt, obj
Example #4
0
def frank_wolfe(objective,
                gradient,
                A,
                b,
                initial_x,
                maxiters=2000,
                tol=1e-4,
                clean=True,
                verbose=None):
    """
    Uses the Frank--Wolfe algorithm to minimize the convex objective.

    Minimization is subject to the linear equality constraint: A x = b.

    Assumes x should be nonnegative.

    Parameters
    ----------
    objective : callable
        The objective function. It would receive a ``cvxopt`` matrix for the
        input `x` and return the value of the objective function.
    gradient : callable
        The gradient function. It should receive a ``cvxopt`` matrix for the
        input `x` and return the value of the gradient evaluated at `x`.
    A : matrix
        A ``cvxopt`` matrix specifying the LHS linear equality constraints.
    b : matrix
        A ``cvxopt`` matrix specifying the RHS linear equality constraints.
    initial_x : matrix
        A ``cvxopt`` matrix specifying the initial `x` to use.
    maxiters : int
        The maximum number of iterations to perform. If convergence was not
        reached after the last iteration, a warning is issued and the current
        value of `x` is returned.
    tol : float
        The tolerance used to determine when we have converged to the optimum.
    clean : bool
        Occasionally, the iteration process will take nonnegative values to be
        ever so slightly negative. If ``True``, then we forcibly make such
        values equal to zero and renormalize the vector. This is an application
        specific decision and is probably not more generally useful.
    verbose : int
        An integer representing the logging level ala the ``logging`` module.
        If `None`, then (effectively) the log level is set to `WARNING`. For
        a bit more information, set this to `logging.INFO`. For a bit less,
        set this to `logging.ERROR`, or perhaps 100.

    """
    # Function level import to avoid circular import.
    from dit.algorithms.optutil import op_runner

    # Function level import to keep cvxopt dependency optional.
    # All variables should be cvxopt variables, not NumPy arrays
    from cvxopt import matrix
    from cvxopt.modeling import variable

    # Set up a custom logger.
    logger = basic_logger('dit.frankwolfe', verbose)

    # Set cvx info level based on logging.DEBUG level.
    if logger.isEnabledFor(logging.DEBUG):
        show_progress = True
    else:
        show_progress = False

    assert (A.size[1] == initial_x.size[0])

    n = initial_x.size[0]
    x = initial_x
    xdiff = 0

    TOL = 1e-7
    verbosechunk = maxiters / 10
    for i in range(maxiters):
        obj = objective(x)
        grad = gradient(x)

        xbar = variable(n)

        new_objective = grad.T * xbar
        constraints = []
        constraints.append((xbar >= 0))
        constraints.append((-TOL <= A * xbar - b))
        constraints.append((A * xbar - b <= TOL))

        logger.debug('FW Iteration: {}'.format(i))
        opt = op_runner(new_objective,
                        constraints,
                        show_progress=show_progress)
        if opt.status != 'optimal':
            msg = '\tFrank-Wolfe: Did not find optimal direction on '
            msg += 'iteration {}: {}'
            msg = msg.format(i, opt.status)
            logger.info(msg)

        # Calculate optimality gap
        xbar_opt = opt.variables()[0].value
        opt_bd = grad.T * (xbar_opt - x)

        msg = "i={:6}  obj={:10.7f}  opt_bd={:10.7f}  xdiff={:12.10f}"
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(msg.format(i, obj, opt_bd[0, 0], xdiff))
            logger.debug("")
        elif i % verbosechunk == 0:
            logger.info(msg.format(i, obj, opt_bd[0, 0], xdiff))

        xnew = (i * x + 2 * xbar_opt) / (i + 2)
        xdiff = np.linalg.norm(xnew - x)
        x = xnew

        if xdiff < tol:
            obj = objective(x)
            break
    else:
        msg = "Only converged to xdiff={:12.10f} after {} iterations. "
        msg += "Desired: {}"
        logger.warn(msg.format(xdiff, maxiters, tol))

    xopt = np.array(x)

    if clean:
        xopt[np.abs(xopt) < tol] = 0
        xopt /= xopt.sum()

    return xopt, obj
Example #5
0
    def __init__(self, dist, sources, target, k=2, rv_mode=None,
                 extra_constraints=True, source_marginal=False, tol=None,
                 prng=None, verbose=None):
        """
        Initialize an optimizer for the partial information framework.

        Parameters
        ----------
        dist : distribution
            The distribution used to calculate the partial information.
        sources : list of lists
            The sources random variables. Each random variable specifies a list
            of random variables in `dist` that define a source.
        target : list
            The random variables in `dist` that define the target.
        k : int
            The size of the marginals that are constrained to equal marginals
            from `dist`. For the calculation of unique information, we use k=2.
            Note that these marginals include the target random variable.
        rv_mode : str, None
            Specifies how to interpret the elements of each source and the
            target. Valid options are: {'indices', 'names'}. If equal to
            'indices', then the elements of each source and the target are
            interpreted as random variable indices. If equal to 'names', the
            elements are interpreted as random variable names. If `None`, then
            the value of `dist._rv_mode` is consulted.
        extra_constraints : bool
            When possible, additional constraints beyond the required marginal
            constraints are added to the optimization problem. These exist
            values of the input and output that satisfy p(inputs | outputs) = 1
            In that case, p(inputs, outputs) is equal to q(inputs, outputs) for
            all q in the feasible set.
        source_marginal : bool
            If `True`, also require that the source marginal distribution
            p(X_1, ..., X_n) is matched. This will yield a distribution such
            that S^k := H(q) - H(p) is the information that is not captured
            by matching the k-way marginals that include the target. k=1
            is the mutual information between the sources and the target.
        tol : float | None
            The desired convergence tolerance.
        prng : RandomState
            A NumPy-compatible pseudorandom number generator.
        verbose : int
            An integer representing the logging level ala the ``logging``
            module. If `None`, then (effectively) the log level is set to
            `WARNING`. For a bit more information, set this to `logging.INFO`.
            For a bit less, set this to `logging.ERROR`, or perhaps 100.

        """
        self.logger = basic_logger('dit.pid_broja', verbose)

        # Store the original parameters in case we want to construct an
        # "uncoalesced" distribution from the optimial distribution.
        self.dist_original = dist
        self._params = Bunch(sources=sources, target=target, rv_mode=rv_mode)

        self.dist = prepare_dist(dist, sources, target, rv_mode=rv_mode)
        self.k = k
        self.extra_constraints = extra_constraints
        self.source_marginal = source_marginal
        self.verbose = verbose

        super(MaximumConditionalEntropy, self).__init__(self.dist, tol=tol, prng=prng)
Example #6
0
    def __init__(self, dist, sources, target, k=2, rv_mode=None,
                 extra_constraints=True, source_marginal=False, tol=None,
                 prng=None, verbose=None):
        """
        Initialize an optimizer for the partial information framework.

        Parameters
        ----------
        dist : distribution
            The distribution used to calculate the partial information.
        sources : list of lists
            The sources random variables. Each random variable specifies a list
            of random variables in `dist` that define a source.
        target : list
            The random variables in `dist` that define the target.
        k : int
            The size of the marginals that are constrained to equal marginals
            from `dist`. For the calculation of unique information, we use k=2.
            Note that these marginals include the target random variable.
        rv_mode : str, None
            Specifies how to interpret the elements of each source and the
            target. Valid options are: {'indices', 'names'}. If equal to
            'indices', then the elements of each source and the target are
            interpreted as random variable indices. If equal to 'names', the
            elements are interpreted as random variable names. If `None`, then
            the value of `dist._rv_mode` is consulted.
        extra_constraints : bool
            When possible, additional constraints beyond the required marginal
            constraints are added to the optimization problem. These exist
            values of the input and output that satisfy p(inputs | outputs) = 1
            In that case, p(inputs, outputs) is equal to q(inputs, outputs) for
            all q in the feasible set.
        source_marginal : bool
            If `True`, also require that the source marginal distribution
            p(X_1, ..., X_n) is matched. This will yield a distribution such
            that S^k := H(q) - H(p) is the information that is not captured
            by matching the k-way marginals that include the target. k=1
            is the mutual information between the sources and the target.
        tol : float | None
            The desired convergence tolerance.
        prng : RandomState
            A NumPy-compatible pseudorandom number generator.
        verbose : int
            An integer representing the logging level ala the ``logging``
            module. If `None`, then (effectively) the log level is set to
            `WARNING`. For a bit more information, set this to `logging.INFO`.
            For a bit less, set this to `logging.ERROR`, or perhaps 100.

        """
        self.logger = basic_logger('dit.pid_broja', verbose)

        # Store the original parameters in case we want to construct an
        # "uncoalesced" distribution from the optimial distribution.
        self.dist_original = dist
        self._params = Bunch(sources=sources, target=target, rv_mode=rv_mode)

        self.dist = prepare_dist(dist, sources, target, rv_mode=rv_mode)
        self.k = k
        self.extra_constraints = extra_constraints
        self.source_marginal = source_marginal
        self.verbose = verbose

        super(MaximumConditionalEntropy, self).__init__(self.dist, tol=tol, prng=prng)