Example #1
0
    def maximize_likelihood(self, obsTime, shiftStart, shiftEnd):

        x0 = np.array((12., 1.))
        if not hasattr(obsTime, "__iter__") or len(np.unique(obsTime)) == 1:
            optResult = optimize.OptimizeResult()
            optResult.x = np.array([obsTime, 1000])
            optResult.fun = -np.inf
            optResult.success = True
        elif not hasattr(obsTime, "__iter__") and len(np.unique(obsTime)) == 0:
            optResult = optimize.OptimizeResult()
            optResult.x = x0
            optResult.fun = np.nan
            optResult.success = True

        else:

            ineqConstr1 = lambda coeff: coeff
            ineqConstr2 = lambda coeff: 24 - coeff[0]

            optResult = optimize.minimize(self.neg_log_likelihood,
                                          x0, (obsTime, shiftStart, shiftEnd),
                                          method='SLSQP',
                                          constraints=({
                                              "type": "ineq",
                                              "fun": ineqConstr1
                                          }, {
                                              "type": "ineq",
                                              "fun": ineqConstr2
                                          }),
                                          options={
                                              'disp': False,
                                              'ftol': 1e-08
                                          })

        print(optResult)

        if optResult.fun < 0:
            self.neg_log_likelihood(optResult.x, obsTime, shiftStart, shiftEnd)

        if not optResult.success:
            raise RuntimeError("Optimization was not successful")

        self.location, self.kappa = optResult.x
        self.AIC = 2 * (optResult.fun + 2)
        self.negLL = optResult.fun

        print("AIC:", self.AIC)

        return optResult
def coord_descent(fun, init, args, **kwargs):
    maxiter = kwargs['maxiter']
    x = init.copy()

    def coord_opt(alpha, scales, i):
        if alpha < 0:
            result = 1e6
        else:
            scales[i] = alpha
            result = fun(scales)

        return result

    nfev = 0
    for j in range(maxiter):
        for i in range(len(x)):
            print("Optimizing variable {}".format(i))
            r = opt.minimize_scalar(lambda alpha: coord_opt(alpha, x, i))
            nfev += r.nfev
            opt_alpha = r.x
            x[i] = opt_alpha

        if 'callback' in kwargs:
            kwargs['callback'](x)

    res = opt.OptimizeResult()
    res.x = x
    res.nit = maxiter
    res.nfev = nfev
    res.fun = np.array([r.fun])
    res.success = True

    return res
Example #3
0
        def custmin(fun, bracket, args=(), maxfev=None, stepsize=0.1,
                maxiter=100, callback=None, **options):
            bestx = (bracket[1] + bracket[0]) / 2.0
            besty = fun(bestx)
            funcalls = 1
            niter = 0
            improved = True
            stop = False

            while improved and not stop and niter < maxiter:
                improved = False
                niter += 1
                for testx in [bestx - stepsize, bestx + stepsize]:
                    testy = fun(testx, *args)
                    funcalls += 1
                    if testy < besty:
                        besty = testy
                        bestx = testx
                        improved = True
                if callback is not None:
                    callback(bestx)
                if maxfev is not None and funcalls >= maxfev:
                    stop = True
                    break

            return optimize.OptimizeResult(fun=besty, x=bestx, nit=niter,
                                           nfev=funcalls, success=(niter > 1))
Example #4
0
 def mcddp(x0, u_init, callback):
     "Monte-Carlo DDP"
     rtol = 1e-2
     niter_same = 0
     niter_same_max = 2
     c_best = cost(u_init.flatten(), x0)
     u_best = u_init
     niter_max = 10
     for i in range(niter_max):
         #log.info('mcddp adding noise')
         std = 1e-5 / dt * np.sqrt(model.u_upper - model.u_lower)
         u_i = u_best.copy()
         u_i = np.random.normal(u_best, std, size=u_i.shape)
         u_i = np.clip(u_i, model.u_lower, model.u_upper)
         #log.info('mcddp solving')
         _, u_i = ddp_solve(x0, initial=u_i, dt=dt, callback=callback)
         #log.info('mcddp final state:\n%s', model.state_rep(step_array(x0, u_i, dt=dt)[-1]))
         c = cost(u_i.flatten(), x0)
         #log.info('mcddp final cost: %.5g', c)
         niter_same += 1
         if c < c_best:
             #log.info('mcddp improved best solution, i: %d, c: %.5g, '
             #         '%%ch: %.5g, std: %s', i, c, (c_best-c)/abs(c_best), std)
             if (c_best - c) / abs(c_best) > rtol:
                 niter_same = 0
             c_best, u_best = c, u_i.copy()
             if callback_opt:
                 callback_opt(x0, u_best)
         if niter_same >= niter_same_max:
             break
     return optimize.OptimizeResult(success=True, x=u_best)
Example #5
0
    def _optimize(self, objective):
        """
        Select the random point with the minimum objective function value.

        Parameters
        ----------
        :param objective: objective function to minimize

        Returns
        -------
        :return: optimal parameter found by the optimizer (scipy format)
        """
        points = self._get_eval_points()

        if self.matrix_to_vector_transform is not None:
            # Transform the sampled matrix points in vectors
            points = np.array([
                self.matrix_to_vector_transform(points[i])
                for i in range(self._nb_samples)
            ])

        evaluations = objective(points)
        idx_best = np.argmin(evaluations, axis=0)

        return sc_opt.OptimizeResult(x=points[idx_best, :],
                                     success=True,
                                     fun=evaluations[idx_best, :],
                                     nfev=points.shape[0],
                                     message="OK")
Example #6
0
        def custmin(fun, x0, args=(), maxfev=None, stepsize=0.1,
                maxiter=100, callback=None, **options):
            bestx = x0
            besty = fun(x0)
            funcalls = 1
            niter = 0
            improved = True
            stop = False

            while improved and not stop and niter < maxiter:
                improved = False
                niter += 1
                for dim in range(np.size(x0)):
                    for s in [bestx[dim] - stepsize, bestx[dim] + stepsize]:
                        testx = np.copy(bestx)
                        testx[dim] = s
                        testy = fun(testx, *args)
                        funcalls += 1
                        if testy < besty:
                            besty = testy
                            bestx = testx
                            improved = True
                    if callback is not None:
                        callback(bestx)
                    if maxfev is not None and funcalls >= maxfev:
                        stop = True
                        break

            return optimize.OptimizeResult(fun=besty, x=bestx, nit=niter,
                                           nfev=funcalls, success=(niter > 1))
Example #7
0
def fit_lstsq(f,
              x0,
              jac=None,
              tol=1e-8,
              delta=None,
              iterations=None,
              callback=None,
              rcond=1e-2,
              lstsq=None):
    """Fit objective function ``f(x) = y`` using a naive repeated linear
    least-squares fit."""
    dx = 0
    for nit in count():
        y0 = f(x0)
        if callback is not None:
            chisq = reduced_chisq(y0)
            callback(
                sciopt.OptimizeResult(x=x0,
                                      fun=y0,
                                      chisq=chisq,
                                      nit=nit,
                                      dx=dx,
                                      success=False,
                                      message="In progress."))
        if nit > 0 and np.allclose(dx, 0, atol=tol):
            message = "Reached convergence"
            success = True
            break
        if iterations is not None and nit > iterations:
            message = "Reached max number of iterations"
            success = False
            break
        dx, dy = fit_lstsq_oneshot(lstsq,
                                   f,
                                   x0,
                                   y0=y0,
                                   jac=jac,
                                   delta=delta,
                                   rcond=rcond)
        x0 += dx
    chisq = reduced_chisq(y0)
    return sciopt.OptimizeResult(x=x0,
                                 fun=y0,
                                 chisq=chisq,
                                 nit=nit,
                                 success=success,
                                 message=message)
Example #8
0
File: api.py Project: wintered/dl2
 def local_optimization_step(fun, x0, *losargs, **loskwargs):
     loss_before = loss_fn(x0)
     inner_opt(constraint_solve, constraint_check, variables, bounds, args)
     r = spo.OptimizeResult()
     r.x, _, _ = vars_to_x(variables)
     loss_after = constraint_solve.to_diffsat(cache=True).loss(args)
     r.success = not (loss_before == loss_after and not constraint_check.to_diffsat(cache=True).satisfy(args))
     r.fun = loss_after
     return r
Example #9
0
def lm(fun, x0, jac, args=(), kwargs={}, ftol=1e-6, max_nfev=10000, x_scale=None,
       geodesic_accel=False, uphill_steps=False):
    LAM_UP = 1.5
    LAM_DOWN = 5.

    if x_scale is None:
        x_scale = np.ones(x0.shape[0], dtype=np.float64)

    x = x0
    xs = x / x_scale
    lam = 100.

    r = fun(x, *args, **kwargs)
    C = dot(r, r) / 2
    Js = jac(x, *args, **kwargs) * x_scale[newaxis, :]
    dC = dot(Js.T, r)
    JsTJs = dot(Js.T, Js)
    assert r.shape[0] == Js.shape[0]

    I = np.eye(Js.shape[1])

    for step in range(max_nfev):
        xs_new = xs - solve(JsTJs + lam * I, dC)
        x_new = xs_new * x_scale

        r_new = fun(x_new, *args, **kwargs)
        C_new = dot(r_new, r_new) / 2
        print('trying step: size {:.3g}, C {:.3g}, lam {:.3g}'.format(
            norm(x - x_new), C_new, lam
        ))
        # print(x - x_new)
        if C_new >= C:
            lam *= LAM_UP
            if lam >= 1e6: break
            continue

        relative_err = abs(C - C_new) / C
        if relative_err <= ftol:
            break

        xs = xs_new
        print(xs)
        x = xs * x_scale
        r = r_new

        C = C_new

        if C < 1e-6: break

        Js = jac(x, *args, **kwargs) * x_scale[newaxis, :]
        dC = dot(Js.T, r)
        JsTJs = dot(Js.T, Js)
        lam /= LAM_DOWN

    return opt.OptimizeResult(x=x, fun=r)
def pure_random_search(function=fn.eggholder,
                       start_coordinates=[0, 0],
                       iterations=100000,
                       bounds=[(-512, 512), (-512, 512)],
                       show_plots=True):
    # defining the number of steps
    n = iterations
    #creating two array for containing x and y coordinate
    #of size equals to the number of size and filled up with 0's
    x = np.zeros(n)
    y = np.zeros(n)
    # set initial coordinates
    x[0], y[0] = start_coordinates[0], start_coordinates[1]
    # set minimum
    minimum = function(start_coordinates)
    best_point = start_coordinates
    # filling the coordinates with random variables
    count = 0
    iter_to_best = [0]
    f_points = [minimum]
    for i in range(1, n):  # use those steps
        x[i] = np.random.uniform(low=bounds[0][0], high=bounds[0][1])
        y[i] = np.random.uniform(low=bounds[1][0], high=bounds[1][1])
        #check if current point is better than current minimum
        curr_point = [x[i], y[i]]
        f_curr_point = function(curr_point)
        if f_curr_point <= minimum:
            f_points.append(f_curr_point)
            iter_to_best.append(count)
            minimum = f_curr_point
            best_point = curr_point
        count += 1
    #insert last iteration f_point
    iter_to_best.append(n)
    f_points.append(f_points[-1])
    #create an optResult object
    result = optimize.OptimizeResult(x=best_point,
                                     fun=minimum,
                                     iter_to_best=iter_to_best,
                                     f_points=f_points)
    #print('true iterations: ', count)
    if show_plots:
        # plotting stuff:
        pylab.title("Pure Random Search ($n = " + str(n) + "$ steps)")
        pylab.plot(x, y, 'o', ms=0.1)
        #pylab.savefig("Pure_Random_Search"+str(n)+".png",bbox_inches="tight",dpi=600)
        pylab.show()
    return result


#pure_random_search()
Example #11
0
def _res2scipy(result, history):
    ret = spopt.OptimizeResult()

    # the following values need refinement
    ret.success = True
    ret.status = 0
    ret.message = 'completed'

    # the following are proper
    ret.x = result.optpar.copy()
    ret.fun = result.optval
    ret.nfev = len(history)

    return ret
Example #12
0
def single_objective(parameters_guess,
                     bounds,
                     fit_bead,
                     fit_parameter_names,
                     exp_dict,
                     global_opts={}):
    r"""
    Evaluate parameter set for equation of state with given experimental data

    Parameters
    ----------
    parameters_guess : numpy.ndarray 
        An array of initial guesses for parameters.
    bounds : list[tuple]
        List of length equal to fit_parameter_names with lists of pairs for minimum and maximum bounds of parameter being fit. Defaults from Eos object are broad, so we recommend specification.
    fit_bead : str
        Name of bead whose parameters are being fit.
    fit_parameter_names : list[str]
        This list contains the name of the parameter being fit (e.g. epsilon). See EOS documentation for supported parameter names. Cross interaction parameter names should be composed of parameter name and the other bead type, separated by an underscore (e.g. epsilon_CO2).
    exp_dict : dict
        Dictionary of experimental data objects.
    global_opts : dict, Optional, default={}
        This dictionary is included for continuity with other global optimization methods, although this method doesn't have options.

    Returns
    -------
    Objective : obj
        scipy OptimizedResult object

    """

    if len(global_opts) > 0:
        logger.info(
            "The fitting method 'single_objective' does not have further options"
        )

    obj_value = ff.compute_obj(parameters_guess, fit_bead, fit_parameter_names,
                               exp_dict, bounds)

    result = spo.OptimizeResult(
        x=parameters_guess,
        fun=obj_value,
        success=True,
        nit=0,
        message=
        "Successfully computed objective function for provided parameter set.",
    )

    return result
Example #13
0
 def ddp_minimizer(cost, u, args, *a, callback=None, **kw):
     n = u.shape[0] // action_shape[0]
     U = u.reshape((n, ) + action_shape)
     (x0, ) = args
     try:
         X, U = ddp_solve(x0,
                          dt=dt,
                          callback=callback,
                          initial=U,
                          atol=5e0,
                          λ_base=3.0,
                          ln_λ=0,
                          ln_λ_max=15,
                          iter_max=500)
     except:
         log.exception('ddp solve failed')
     return optimize.OptimizeResult(x=U.flatten(),
                                    fun=cost(U.flatten(), x0),
                                    success=True)
Example #14
0
def minimize_pgd_madry(closure,
                       x0,
                       prox,
                       lmo,
                       step=None,
                       max_iter=200,
                       prox_args=(),
                       callback=None):
    x = x0.detach().clone()
    batch_size = x.size(0)

    if step is None:
        # estimate lipschitz constant
        # TODO: this is not the optimal step-size (if there even is one.)
        # I don't recommend to use this.
        L = utils.init_lipschitz(closure, x0)
        step_size = 1. / L

    elif isinstance(step, Number):
        step_size = torch.ones(batch_size, device=x.device) * step

    elif isinstance(step, torch.Tensor):
        step_size = step

    else:
        raise ValueError(
            f"step must be a number or a torch Tensor, got {step} instead")

    for it in range(max_iter):
        x.requires_grad = True
        _, grad = closure(x)
        with torch.no_grad():
            update_direction, _ = lmo(-grad, x)
            update_direction += x
            x = prox(x + utils.bmul(step_size, update_direction), step_size,
                     *prox_args)

        if callback is not None:
            if callback(locals()) is False:
                break

    fval, grad = closure(x)
    return optimize.OptimizeResult(x=x, nit=it, fval=fval, grad=grad)
Example #15
0
def _scipy_minimize(minimizer,
                    f,
                    x0,
                    delta=None,
                    jac=None,
                    callback=None,
                    **kwargs):
    state = sciopt.OptimizeResult(x=x0,
                                  fun=None,
                                  chisq=None,
                                  nit=0,
                                  success=False,
                                  message="In progress.")

    def callback_wrapper(x, *_):
        state.nit += 1
        state.dx = x - state.x
        state.x = x
        state.fun = f(x)
        state.chisq = reduced_chisq(state.fun)
        callback(state)

    def obj_fun(x):
        return reduced_chisq(f(x))

    if jac is None and delta is not None:
        jac = partial(jac_twopoint, obj_fun, delta=delta)

    result = minimizer(obj_fun,
                       x0,
                       jac=jac,
                       callback=callback and callback_wrapper,
                       **kwargs)
    result.fun = f(result.x)
    result.chisq = reduced_chisq(result.fun)
    return result
Example #16
0
    def _optimize(self, objective):
        """
        Minimize the objective function

        Parameters
        ----------
        :param objective: objective function to minimize

        Returns
        -------
        :return: optimal parameter found by the optimization (scipy format)
        """
        # Initial value
        initial = self.get_initial()[0]

        if self.vector_to_matrix_transform is not None:
            initial = self.vector_to_matrix_transform(initial)

        if self.solver_type is 'NelderMead' or self.solver_type is 'ParticleSwarm':
            initial = None

        # Create tensorflow variable
        if self.matrix_manifold_dimension is None:
            x_tf = tf.Variable(tf.zeros(self.dimension, dtype=tf.float64))
        else:
            x_tf = tf.Variable(
                tf.zeros([
                    self.matrix_manifold_dimension,
                    self.matrix_manifold_dimension
                ],
                         dtype=tf.float64))

        # Cost function for pymanopt
        def objective_fct(x):
            if self.matrix_to_vector_transform_tf is not None:
                # Reshape x from matrix to vector form to compute the objective function (tensorflow format)
                x = self.matrix_to_vector_transform_tf(
                    x, self.matrix_manifold_dimension)
            return objective(x)[0]

        # Transform the cost function to tensorflow function
        cost = tf.py_function(objective_fct, [x_tf], tf.float64)

        # Gradient function for pymanopt
        def objective_grad(x):
            if self.matrix_to_vector_transform is not None:
                # Reshape x from matrix to vector form to compute the gradient
                x = self.matrix_to_vector_transform(x)

            # Compute the gradient
            grad = np.array(objective(x)[1])[0]

            if self.vector_to_matrix_transform is not None:
                # Reshape the gradient in matrix form for the optimization on the manifold
                grad = self.vector_to_matrix_transform(grad)
            return grad

        # Define pymanopt problem
        problem = pyman.Problem(manifold=self.manifold,
                                cost=cost,
                                egrad=objective_grad,
                                arg=x_tf,
                                verbosity=2)

        # Optimize the parameters of the problem
        opt_x, opt_log = self.solver.solve(problem, x=initial)

        if self.matrix_to_vector_transform is not None:
            # Reshape the optimum from matrix to vector form
            opt_x = self.matrix_to_vector_transform(opt_x)

        # Format the result to fit with GPflowOpt
        result = sc_opt.OptimizeResult(
            x=opt_x,
            fun=opt_log['final_values']['f(x)'],
            nit=opt_log['final_values']['iterations'],
            message=opt_log['stoppingreason'],
            success=True)

        return result
Example #17
0
def minimize_frank_wolfe(
    f_grad,
    x0,
    lmo,
    step="backtracking",
    lipschitz=None,
    max_iter=400,
    tol=1e-12,
    callback=None,
    verbose=0,
):
    r"""Frank-Wolfe algorithm.

  Implements the Frank-Wolfe algorithm, see , see :ref:`frank_wolfe` for
  a more detailed description.

  Args:
    f_grad: callable
      Takes as input the current iterate (a vector of same size as x0) and
      returns the function value and gradient of the objective function.
      It should accept the optional argument return_gradient, and when False
      it should return only the function value.

    x0: array-like
      Initial guess for solution.

    lmo: callable
      Takes as input a vector u of same size as x0 and returns both the update
      direction and the maximum admissible step-size.

    step: str or callable, optional
      Step-size strategy to use. Should be one of

        - "backtracking", will use the backtracking line-search from [1]_

        - "DR", will use the Demyanov-Rubinov step-size. This step-size minimizes
        a quadratic upper bound ob the objective using the gradient's lipschitz
        constant, passed in keyword argument `lipschitz`.

        - "sublinear", will use a decreasing step-size of the form 2/(k+2).

        - callable, if step is a callable function, it will use the step-size
            returned by step(locals).

    lipschitz: None or float, optional
      Estimate for the Lipschitz constant of the gradient. Required when step="DR".

    max_iter: integer, optional
      Maximum number of iterations.

    tol: float, optional
      Tolerance of the stopping criterion. The algorithm will stop whenever
      the Frank-Wolfe gap is below tol or the maximum number of iterations
      is exceeded.

    callback: callable, optional
      Callback to execute at each iteration. If the callable returns False
      then the algorithm with immediately return.

    verbose: int, optional
      Verbosity level.


  Returns:
    res : scipy.optimize.OptimizeResult
      The optimization result represented as a
      ``scipy.optimize.OptimizeResult`` object. Important attributes are:
      ``x`` the solution array, ``success`` a Boolean flag indicating if
      the optimizer exited successfully and ``message`` which describes
      the cause of the termination. See `scipy.optimize.OptimizeResult`
      for a description of other attributes.


  References:
    [1] Jaggi, Martin. `"Revisiting Frank-Wolfe: Projection-Free Sparse Convex
    Optimization." <http://proceedings.mlr.press/v28/jaggi13-supp.pdf>`_
    ICML 2013.

    [2] Pedregosa, Fabian `"Notes on the Frank-Wolfe Algorithm"
    <http://fa.bianp.net/blog/2018/notes-on-the-frank-wolfe-algorithm-part-i/>`_,
    2018

    [3] Pedregosa, Fabian, Armin Askari, Geoffrey Negiar, and Martin Jaggi.
    `"Step-Size Adaptivity in Projection-Free Optimization."
    <https://arxiv.org/pdf/1806.05123.pdf>`_ arXiv:1806.05123 (2018).


  Examples:
    * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sparse_benchmark.py`
    * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_vertex_overlap.py`
  """
    x0 = np.asanyarray(x0, dtype=np.float)
    if tol < 0:
        raise ValueError("Tol must be non-negative")
    x = x0.copy()
    lipschitz_t = None
    step_size = None
    if lipschitz is not None:
        lipschitz_t = lipschitz

    f_t, grad = f_grad(x)
    old_f_t = None

    it = 0
    for it in range(max_iter):
        update_direction, max_step_size = lmo(-grad, x)
        norm_update_direction = linalg.norm(update_direction) ** 2
        certificate = np.dot(update_direction, -grad)

        # .. compute an initial estimate for the ..
        # .. Lipschitz estimate if not given ...
        if lipschitz_t is None:
            eps = 1e-3
            grad_eps = f_grad(x + eps * update_direction)[1]
            lipschitz_t = linalg.norm(grad - grad_eps) / (
                eps * np.sqrt(norm_update_direction)
            )
            print("Estimated L_t = %s" % lipschitz_t)

        if certificate <= tol:
            break
        if hasattr(step, "__call__"):
            step_size = step(locals())
            f_next, grad_next = f_grad(x + step_size * update_direction)
        elif step == "backtracking":
            step_size, lipschitz_t, f_next, grad_next = backtracking_step_size(
                x,
                f_t,
                old_f_t,
                f_grad,
                certificate,
                lipschitz_t,
                max_step_size,
                update_direction,
                norm_update_direction,
            )
        elif step == "DR":
            if lipschitz is None:
                raise ValueError('lipschitz needs to be specified with step="DR"')
            step_size = min(
                certificate / (norm_update_direction * lipschitz_t), max_step_size
            )
            f_next, grad_next = f_grad(x + step_size * update_direction)
        elif step == "oblivious":
            # .. without knowledge of the Lipschitz constant ..
            # .. we take the oblivious 2/(k+2) step-size ..
            step_size = 2.0 / (it + 2)
            f_next, grad_next = f_grad(x + step_size * update_direction)
        else:
            raise ValueError("Invalid option step=%s" % step)
        if callback is not None:
            callback(locals())
        x += step_size * update_direction

        old_f_t = f_t
        f_t, grad = f_next, grad_next
    if callback is not None:
        callback(locals())
    return optimize.OptimizeResult(x=x, nit=it, certificate=certificate)
Example #18
0
def minimize_PGD(f,
                 g=None,
                 x0=None,
                 tol=1e-6,
                 max_iter=500,
                 verbose=0,
                 callback=None,
                 backtracking: bool = True,
                 step_size=None,
                 max_iter_backtracking=100,
                 backtracking_factor=0.6,
                 trace=False) -> optimize.OptimizeResult:
    """Proximal gradient descent.

    Solves problems of the form

            minimize_x f(x) + g(x)

    where we have access to the gradient of f and to the proximal operator of g.

    Arguments:
        f : loss function (smooth)

        g : penalty term (proximal)

        x0 : array-like, optional
            Initial guess

        backtracking : boolean
            Whether to perform backtracking (i.e. line-search) or not.

        max_iter : int
            Maximum number of iterations.

        verbose : int
            Verbosity level, from 0 (no output) to 2 (output on each iteration)

        step_size : float
            Starting value for the line-search procedure. XXX

        callback : callable
            callback function (optional).

    Returns:
        res : The optimization result represented as a
            ``scipy.optimize.OptimizeResult`` object. Important attributes are:
            ``x`` the solution array, ``success`` a Boolean flag indicating if
            the optimizer exited successfully and ``message`` which describes
            the cause of the termination. See `scipy.optimize.OptimizeResult`
            for a description of other attributes.

    References:
        Beck, Amir, and Marc Teboulle. "Gradient-based algorithms with applications to signal
        recovery." Convex optimization in signal processing and communications (2009)
    """
    if x0 is None:
        xk = np.zeros(f.n_features)
    else:
        xk = np.array(x0, copy=True)
    if not max_iter_backtracking > 0:
        raise ValueError('Line search iterations need to be greater than 0')
    if g is None:
        g = ZeroLoss()

    if step_size is None:
        # sample to estimate Lipschitz constant
        step_size_n_sample = 5
        L = []
        for _ in range(step_size_n_sample):
            x_tmp = np.random.randn(f.n_features)
            x_tmp /= linalg.norm(x_tmp)
            L.append(linalg.norm(f(xk) - f(x_tmp)))
        # give it a generous upper bound
        step_size = 2. / np.mean(L)

    success = False
    trace_func = []
    trace_time = []
    trace_x = []
    start_time = datetime.now()

    it = 1
    # .. a while loop instead of a for loop ..
    # .. allows for infinite or floating point max_iter ..

    if trace:
        trace_x.append(xk.copy())
        trace_func.append(f(xk) + g(xk))
        trace_time.append((datetime.now() - start_time).total_seconds())

    while it <= max_iter:
        # .. compute gradient and step size
        current_step_size = step_size
        grad_fk = f.gradient(xk)
        x_next = g.prox(xk - current_step_size * grad_fk, current_step_size)
        incr = x_next - xk
        if backtracking:
            fk = f(xk)
            f_next = f(x_next)
            for _ in range(max_iter_backtracking):
                if f_next <= fk + grad_fk.dot(
                        incr) + incr.dot(incr) / (2.0 * current_step_size):
                    # .. step size found ..
                    break
                else:
                    # .. backtracking, reduce step size ..
                    current_step_size *= backtracking_factor
                    x_next = g.prox(xk - current_step_size * grad_fk,
                                    current_step_size)
                    incr = x_next - xk
                    f_next = f(x_next)
            else:
                warnings.warn(
                    "Maxium number of line-search iterations reached")
        certificate = np.linalg.norm((xk - x_next) / step_size)
        xk[:] = x_next

        if trace:
            trace_x.append(xk.copy())
            trace_func.append(f(xk) + g(xk))
            trace_time.append((datetime.now() - start_time).total_seconds())

        if verbose > 0:
            print("Iteration %s, step size: %s" % (it, step_size))

        if certificate < tol:
            if verbose:
                print("Achieved relative tolerance at iteration %s" % it)
            success = True
            break

        if callback is not None:
            callback(xk)
        it += 1
    if it >= max_iter:
        warnings.warn(
            "proximal_gradient did not reach the desired tolerance level",
            RuntimeWarning)

    return optimize.OptimizeResult(x=xk,
                                   success=success,
                                   certificate=certificate,
                                   nit=it,
                                   trace_x=np.array(trace_x),
                                   trace_func=np.array(trace_func),
                                   trace_time=trace_time)
Example #19
0
def minimize_frank_wolfe(closure,
                         x0,
                         lmo,
                         step='sublinear',
                         max_iter=200,
                         callback=None):
    """Performs the Frank-Wolfe algorithm on a batch of objectives of the form
      min_x f(x)
      s.t. x in C

    where we have access to the Linear Minimization Oracle (LMO) of the constraint set C,
    and the gradient of f through closure.

    Args:
      closure: callable
        gives function values and the jacobian of f.

      x0: torch.Tensor of shape (batch_size, *).
        initial guess

      lmo: callable
        Returns update_direction, max_step_size

      step: float or 'sublinear'
        step-size scheme to be used.

      max_iter: int
        max number of iterations.

      callback: callable
        (optional) Any callable called on locals() at the end of each iteration.
        Often used for logging.
    """
    x = x0.detach().clone()
    batch_size = x.size(0)
    if not (isinstance(step, Number) or step == 'sublinear'):
        raise ValueError("step must be a float or 'sublinear'.")

    if isinstance(step, Number):
        step_size = step * torch.ones(
            batch_size, device=x.device, dtype=x.dtype)

    cert = np.inf * torch.ones(batch_size, device=x.device)

    for it in range(max_iter):

        x.requires_grad = True
        fval, grad = closure(x)
        update_direction, max_step_size = lmo(-grad, x)
        cert = utils.bdot(-grad, update_direction)

        if step == 'sublinear':
            step_size = 2. / (it + 2) * torch.ones(
                batch_size, dtype=x.dtype, device=x.device)

        with torch.no_grad():
            step_size = torch.min(step_size, max_step_size)
            x += utils.bmul(update_direction, step_size)

        if callback is not None:
            if callback(locals()) is False:
                break

    fval, grad = closure(x)
    return optimize.OptimizeResult(x=x,
                                   nit=it,
                                   fval=fval,
                                   grad=grad,
                                   certificate=cert)
Example #20
0
def minimize_vrtos(
    f_deriv,
    A,
    b,
    x0,
    step_size,
    prox_1=None,
    prox_2=None,
    alpha=0,
    max_iter=500,
    tol=1e-6,
    callback=None,
    verbose=0,
):
    r"""Variance-reduced three operator splitting (VRTOS) algorithm.

    The VRTOS algorithm can solve optimization problems of the form

        argmin_{x \in R^p} \sum_{i}^n_samples f(A_i^T x, b_i) + alpha *
        ||x||_2^2 +
                                            + pen1(x) + pen2(x)

    Parameters
    ----------
    f_deriv
        derivative of f

    x0: np.ndarray or None, optional
        Starting point for optimization.

    step_size: float or None, optional
        Step size for the optimization. If None is given, this will be
        estimated from the function f.

    n_jobs: int
        Number of threads to use in the optimization. A number higher than 1
        will use the Asynchronous SAGA optimization method described in
        [Pedregosa et al., 2017]

    max_iter: int
        Maximum number of passes through the data in the optimization.

    tol: float
        Tolerance criterion. The algorithm will stop whenever the norm of the
        gradient mapping (generalization of the gradient for nonsmooth
        optimization)
        is below tol.

    verbose: bool
        Verbosity level. True might print some messages.

    trace: bool
        Whether to trace convergence of the function, useful for plotting and/or
        debugging. If ye, the result will have extra members trace_func,
        trace_time.

    Returns
    -------
    opt: OptimizeResult
        The optimization result represented as a
        ``scipy.optimize.OptimizeResult`` object. Important attributes are:
        ``x`` the solution array, ``success`` a Boolean flag indicating if
        the optimizer exited successfully and ``message`` which describes
        the cause of the termination. See `scipy.optimize.OptimizeResult`
        for a description of other attributes.

    References
    ----------
    Pedregosa, Fabian, Kilian Fatras, and Mattia Casotto. "Variance Reduced
    Three Operator Splitting." arXiv preprint arXiv:1806.07294 (2018).
    """

    n_samples, n_features = A.shape
    success = False

    # FIXME: just a workaround for now
    # FIXME: check if prox_1 is a tuple
    if hasattr(prox_1, "__len__") and len(prox_1) == 2:
        blocks_1 = prox_1[1]
        prox_1 = prox_1[0]
    else:
        blocks_1 = sparse.eye(n_features, n_features, format="csr")
    if hasattr(prox_2, "__len__") and len(prox_2) == 2:
        blocks_2 = prox_2[1]
        prox_2 = prox_2[0]
    else:
        blocks_2 = sparse.eye(n_features, n_features, format="csr")

    Y = np.zeros((2, x0.size))
    z = x0.copy()

    assert A.shape[0] == b.size

    if step_size < 0:
        raise ValueError

    if prox_1 is None:

        @utils.njit
        def prox_1(x, i, indices, indptr, d, step_size):
            pass

    if prox_2 is None:

        @utils.njit
        def prox_2(x, i, indices, indptr, d, step_size):
            pass

    A = sparse.csr_matrix(A)
    epoch_iteration = _factory_sparse_vrtos(f_deriv, prox_1, prox_2, blocks_1,
                                            blocks_2, A, b, alpha, step_size)

    # .. memory terms ..
    memory_gradient = np.zeros(n_samples)
    gradient_average = np.zeros(n_features)
    x1 = x0.copy()
    grad_tmp = np.zeros(n_features)

    # warm up for the JIT
    epoch_iteration(
        Y,
        x0,
        x1,
        z,
        memory_gradient,
        gradient_average,
        np.array([0]),
        grad_tmp,
        step_size,
    )

    # .. iterate on epochs ..
    if callback is not None:
        callback(locals())
    for it in range(max_iter):
        epoch_iteration(
            Y,
            x0,
            x1,
            z,
            memory_gradient,
            gradient_average,
            np.random.permutation(n_samples),
            grad_tmp,
            step_size,
        )

        certificate = np.linalg.norm(x0 - z) + np.linalg.norm(x1 - z)
        if callback is not None:
            callback(locals())

    return optimize.OptimizeResult(x=z,
                                   success=success,
                                   nit=it,
                                   certificate=certificate)
Example #21
0
def minimize_three_split(closure,
                         x0,
                         prox1=None,
                         prox2=None,
                         tol=1e-6,
                         max_iter=1000,
                         verbose=0,
                         callback=None,
                         line_search=True,
                         step=None,
                         max_iter_backtracking=100,
                         backtracking_factor=0.7,
                         h_Lipschitz=None,
                         *args_prox):
    """Davis-Yin three operator splitting method.
    This algorithm can solve problems of the form

                minimize_x f(x) + g(x) + h(x)

    where f is a smooth function and g and h are (possibly non-smooth)
    functions for which the proximal operator is known.

    Remark: this method returns x = prox1(...). If g and h are two indicator
      functions, this method only garantees that x is feasible for the first.
      Therefore if one of the constraints is a hard constraint,
      make sure to pass it to prox1.

    Args:
      closure: callable
        Returns the function values and gradient of the objective function.
        With return_gradient=False, returns only the function values.
        Shape of return value: (batch_size, *)

      x0 : torch.Tensor(shape: (batch_size, *))
        Initial guess

      prox1 : callable or None
        prox1(x, step_size, *args) returns the proximal operator of g at xa
        with parameter step_size.
        step_size can be a scalar or of shape (batch_size,).

      prox2 : callable or None
        prox2(x, step_size, *args) returns the proximal operator of g at xa
        with parameter step_size.
        alpha can be a scalar or of shape (batch_size,).

      tol: float
        Tolerance of the stopping criterion.

      max_iter : int
        Maximum number of iterations.

      verbose : int
        Verbosity level, from 0 (no output) to 2 (output on each iteration)

      callback : callable.
        callback function (optional).
        Called with locals() at each step of the algorithm.
        The algorithm will exit if callback returns False.

      line_search : boolean
        Whether to perform line-search to estimate the step sizes.

      step_size : float or tensor(shape: (batch_size,)) or None
        Starting value(s) for the line-search procedure.
        if None, step_size will be estimated for each datapoint in the batch.

      max_iter_backtracking: int
        maximun number of backtracking iterations.  Used in line search.

      backtracking_factor: float
        the amount to backtrack by during line search.

      args_prox: iterable
        (optional) Extra arguments passed to the prox functions.

      kwargs_prox: dict
        (optional) Extra keyword arguments passed to the prox functions.


    Returns:
      res : OptimizeResult
        The optimization result represented as a
        ``scipy.optimize.OptimizeResult`` object. Important attributes are:
        ``x`` the solution tensor, ``success`` a Boolean flag indicating if
        the optimizer exited successfully and ``message`` which describes
        the cause of the termination. See `scipy.optimize.OptimizeResult`
        for a description of other attributes.
    """

    success = torch.zeros(x0.size(0), dtype=bool)
    if not max_iter_backtracking > 0:
        raise ValueError("Line search iterations need to be greater than 0")

    LS_EPS = np.finfo(np.float).eps

    if prox1 is None:

        @torch.no_grad()
        def prox1(x, s=None, *args):
            return x

    if prox2 is None:

        @torch.no_grad()
        def prox2(x, s=None, *args):
            return x

    x = x0.detach().clone().requires_grad_(True)
    batch_size = x.size(0)

    if step is None:
        line_search = True
        step_size = 1.0 / utils.init_lipschitz(closure, x)

    elif isinstance(step, Number):
        step_size = step * torch.ones(
            batch_size, device=x.device, dtype=x.dtype)

    else:
        raise ValueError("step must be float or None.")

    z = prox2(x, step_size, *args_prox)
    z = z.clone().detach()
    z.requires_grad_(True)

    fval, grad = closure(z)

    x = prox1(z - utils.bmul(step_size, grad), step_size, *args_prox)
    u = torch.zeros_like(x)

    for it in range(max_iter):
        z.requires_grad_(True)
        fval, grad = closure(z)
        with torch.no_grad():
            x = prox1(z - utils.bmul(step_size, u + grad), step_size,
                      *args_prox)
            incr = x - z
            norm_incr = torch.norm(incr.view(incr.size(0), -1), dim=-1)
            rhs = fval + utils.bdot(grad, incr) + ((norm_incr**2) /
                                                   (2 * step_size))
            ls_tol = closure(x, return_jac=False)
            mask = torch.bitwise_and(norm_incr > 1e-7, line_search)
            ls = mask.detach().clone()
            # TODO: optimize code in this loop using mask
            for it_ls in range(max_iter_backtracking):
                if not (mask.any()):
                    break
                rhs[mask] = fval[mask] + utils.bdot(grad[mask], incr[mask])
                rhs[mask] += utils.bmul(norm_incr[mask]**2,
                                        1. / (2 * step_size[mask]))

                ls_tol[mask] = closure(x, return_jac=False)[mask] - rhs[mask]
                mask &= (ls_tol > LS_EPS)
                step_size[mask] *= backtracking_factor

            z = prox2(x + utils.bmul(step_size, u), step_size, *args_prox)
            u += utils.bmul(x - z, 1. / step_size)
            certificate = utils.bmul(norm_incr, 1. / step_size)

        if callback is not None:
            if callback(locals()) is False:
                break

        success = torch.bitwise_and(certificate < tol, it > 0)
        if success.all():
            break

    return optimize.OptimizeResult(x=x,
                                   success=success,
                                   nit=it,
                                   fval=fval,
                                   certificate=certificate)
Example #22
0
def run_station(config_file, waveform_file, network, station, location,
                logger):
    """Runner for analysis of single station. For multiple stations, set up config file to run batch
    job using mpi_job CLI.

    The output file is in HDF5 format. The configuration details are added to the output file for traceability.

    :param config_file: Config filename specifying job settings
    :type config_file: dict
    :param waveform_file: Event waveform source file for seismograms, generated using `extract_event_traces.py` script
    :type waveform_file: str or pathlib.Path
    :param network: Network code of station to analyse
    :type network: str
    :param station: Station code to analyse
    :type station: str
    :param location: Location code of station to analyse. Can be '' (empty string) if not set.
    :type location: str
    :param logger: Output logging instance
    :type logger: logging.Logger
    :return: Pair containing (solution, configuration) containers. Configuration will have additional traceability
        information.
    :rtype: (solution, dict)
    """
    with open(config_file, 'r') as cf:
        config = json.load(cf)
    # end with
    # logger.info("Config:\n{}".format(json.dumps(config, indent=4)))
    station_id = "{}.{}.{}".format(network, station, location)
    logger.info("Network.Station.Location: {}".format(station_id))
    config.update({"station_id": station_id})

    stype = config['solver']['type']
    if stype.lower() == 'mcmc':
        runner = run_mcmc
    else:
        logger.error("Unknown solver type: {}".format(stype))
        return (None, config)
    # end if

    # Load input data
    logger.info('Ingesting waveform file {}'.format(waveform_file))
    waveform_data = NetworkEventDataset(waveform_file,
                                        network=network,
                                        station=station,
                                        location=location)
    config.update({"waveform_file": waveform_file})

    # Trim entire dataset to max time window required.
    time_window = config["su_energy_opts"]["time_window"]
    # Trim streams to time window
    waveform_data.apply(
        lambda stream: stream.trim(stream[0].stats.onset + time_window[0],
                                   stream[0].stats.onset + time_window[1]))

    # Curate input data if curation options given
    if "curation_opts" in config:
        curation_opts = config["curation_opts"]
        if curation_opts:
            curate_seismograms(waveform_data, curation_opts, logger)
        # end if
    # end if

    try:
        # Ordering of seismograms important here, since storage of sequential values in solution
        # depend on it. Here the input seismograms are ordered by event ID.
        soln = runner(waveform_data.station(station).values(), config, logger)
    except Exception as e:
        logger.error('Runner failed on station {}'.format(station_id))
        logger.exception(e)
        soln = optimize.OptimizeResult()
        soln.success = False
        soln.message = str(e)
    # end try

    # Add ordered event IDs so source waveforms can be re-extraced later
    # from source file if necessary.
    try:
        ordered_event_ids = [
            st[0].stats.event_id
            for st in waveform_data.station(station).values()
        ]
    except Exception as e:
        logger.error(
            'Event ID collection failed on station {}'.format(station_id))
        logger.exception(e)
        ordered_event_ids = []
    # end try
    config.update({"event_ids": ordered_event_ids})

    return soln, config
Example #23
0
def minimize(X, f, length, *varargin):

    realmin = np.finfo(np.double).tiny
    INT = 0.1  #don't reevaluate within 0.1 of the limit of the current bracket
    EXT = 3.0  #extrapolate maximum 3 times the current step-size
    MAX = 20  #max 20 function evaluations per line search
    RATIO = 10  #maximum allowed slope ratio
    SIG = 0.1
    RHO = SIG / 2  #SIG and RHO are the constants controlling the Wolfe-
    #Powell conditions. SIG is the maximum allowed absolute ratio between
    #previous and new slopes (derivatives in the search direction), thus setting
    #SIG to low (positive) values forces higher precision in the line-searches.
    #RHO is the minimum allowed fraction of the expected (from the slope at the
    #initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1.
    #Tuning of SIG (depending on the nature of the function to be optimized) may
    #speed up the minimization; it is probably not worth playing much with RHO.

    #The code falls naturally into 3 parts, after the initial line search is
    #started in the direction of steepest descent. 1) we first enter a while loop
    #which uses point 1 (p1) and (p2) to compute an extrapolation (p3), until we
    #have extrapolated far enough (Wolfe-Powell conditions). 2) if necessary, we
    #enter the second loop which takes p2, p3 and p4 chooses the subinterval
    #containing a (local) minimum, and interpolates it, unil an acceptable point
    #is found (Wolfe-Powell conditions). Note, that points are always maintained
    #in order p0 <= p1 <= p2 < p3 < p4. 3) compute a new search direction using
    #conjugate gradients (Polack-Ribiere flavour), or revert to steepest if there
    #was a problem in the previous line-search. Return the best value so far, if
    #two consecutive line-searches fail, or whenever we run out of function
    #evaluations or line-searches. During extrapolation, the "f" function may fail
    #either with an error or returning Nan or Inf, and minimize should handle this
    #gracefully.

    red = 1.0
    if length > 0: S = 'Linesearch'
    else: S = 'Function evaluation'

    funcalls = 0
    i = 0  #zero the run length counter
    ls_failed = False  #no previous line search has failed

    f0, df0 = f(X, *varargin)  #get function value and gradient
    funcalls += 1

    #print S, 'iteration', i, 'Value: %4.6e'%f0
    fX = [f0]
    if (length < 0): i += 1  #count epochs?!
    s = -df0
    d0 = -s.dot(s)  #initial search direction (steepest) and slope
    x3 = red / (1 - d0)  #initial step is red/(|s|+1)

    while (i < np.abs(length)):
        if (length > 0): i += 1  #count epochs?!
        X0 = X.copy()
        F0 = f0
        dF0 = df0.copy()
        M = (MAX if (length > 0) else np.minimum(MAX, -length - i))
        while True:
            x2 = 0
            f2 = f0
            d2 = d0
            f3 = f0
            df3 = df0.copy()
            success = False
            while (not success and M > 0):
                try:
                    M -= 1
                    if (length < 0): i += 1
                    f3, df3 = f(X + x3 * s, *varargin)
                    funcalls += 1
                    if (np.isnan(f3) or np.isinf(f3)): raise Exception('')
                    success = True
                except:
                    x3 = (x2 + x3) / 2.0  #bisect and try again

            if (f3 < F0):  #keep best values
                X0 = X + x3 * s
                F0 = f3
                dF0 = df3.copy()
            d3 = df3.dot(s)  #new slope
            if (d3 > SIG * d0 or f3 > f0 + x3 * RHO * d0 or M == 0):
                break  #are we done extrapolating?

            x1 = x2
            f1 = f2
            d1 = d2
            # move point 2 to point 1
            x2 = x3
            f2 = f3
            d2 = d3
            # move point 3 to point 2
            A = 6 * (f1 - f2) + 3 * (d2 + d1) * (x2 - x1)
            # make cubic extrapolation
            B = 3 * (f2 - f1) - (2 * d1 + d2) * (x2 - x1)
            x3 = x1 - d1 * (x2 - x1)**2 / (B + np.sqrt(B * B - A * d1 *
                                                       (x2 - x1))
                                           )  # num. error possible, ok!
            if (not np.isreal(x3) or np.isnan(x3) or np.isinf(x3) or x3 < 0):
                x3 = x2 * EXT  # num prob | wrong sign?
            elif (x3 > x2 * EXT):
                x3 = x2 * EXT  # new point beyond extrapolation limit?	 extrapolate maximum amount
            elif (x3 < x2 + INT * (x2 - x1)):
                x3 = x2 + INT * (x2 - x1
                                 )  # new point too close to previous point?

        while ((np.abs(d3) > -SIG * d0 or f3 > f0 + x3 * RHO * d0)
               and M > 0):  # keep interpolating
            if (d3 > 0 or f3 > f0 + x3 * RHO * d0):  # choose subinterval
                x4 = x3
                f4 = f3
                d4 = d3
                # move point 3 to point 4
            else:
                x2 = x3
                f2 = f3
                d2 = d3
                # move point 3 to point 2
            if (f4 > f0):
                x3 = x2 - (0.5 * d2 * (x4 - x2)**2) / (
                    f4 - f2 - d2 * (x4 - x2))  # quadratic interpolation
            else:
                A = 6 * (f2 - f4) / (x4 - x2) + 3 * (d4 + d2)
                # cubic interpolation
                B = 3 * (f4 - f2) - (2 * d2 + d4) * (x4 - x2)
                x3 = x2 + (np.sqrt(B * B - A * d2 * (x4 - x2)**2) - B) / A
                # num. error possible, ok!
            if (np.isnan(x3) or np.isinf(x3)):
                x3 = (x2 + x4) / 2
                # if we had a numerical problem then bisect
            x3 = np.maximum(np.minimum(x3, x4 - INT * (x4 - x2)),
                            x2 + INT * (x4 - x2))
            # don't accept too close

            f3, df3 = f(X + x3 * s, *varargin)
            funcalls += 1
            if (f3 < F0):  # keep best values
                X0 = X + x3 * s
                F0 = f3
                dF0 = df3.copy()
            M -= 1
            if (length < 0): i += 1  # count epochs?!
            d3 = df3.dot(s)  # new slope

        if (np.abs(d3) < -SIG * d0
                and f3 < f0 + x3 * RHO * d0):  # if line search succeeded
            X = X + x3 * s
            f0 = f3
            fX.append(f0)  # update variables
            #print S, i, 'Value: %4.6e'%f0
            s = (df3.dot(df3) - df0.dot(df3)) / (
                df0.dot(df0)) * s - df3  # Polack-Ribiere CG direction
            df0 = df3.copy()  # swap derivatives
            d3 = d0
            d0 = df0.dot(s)
            if (d0 > 0):  # new slope must be negative
                s = -df0
                d0 = -s.dot(s)  # otherwise use steepest direction
            x3 *= np.minimum(RATIO,
                             d3 / (d0 - realmin))  # slope ratio but max RATIO
            ls_failed = False  # this line search did not fail
        else:
            X = X0
            f0 = F0
            df0 = dF0.copy()  # restore best point so far
            if (ls_failed or
                    i > np.abs(length)):  # line search failed twice in a row
                break  # or we ran out of time, so we give up
            s = -df0
            d0 = -s.dot(s)
            # try steepest
            x3 = 1.0 / (1.0 - d0)
            ls_failed = True  # this line search failed

    #print S, 'iteration', i, 'Value: %4.6e'%f0

    return optimize.OptimizeResult(fun=F0,
                                   x=X0,
                                   nit=i,
                                   nfev=funcalls,
                                   success=True,
                                   status=0,
                                   message='')
Example #24
0
def minimize_frank_wolfe(
        fun,
        x0,
        lmo,
        x0_rep=None,
        variant='vanilla',
        jac="2-point",
        step="backtracking",
        lipschitz=None,
        args=(),
        max_iter=400,
        tol=1e-12,
        callback=None,
        verbose=0,
        eps=1e-8,
):
    r"""Frank-Wolfe algorithm.

  Implements the Frank-Wolfe algorithm, see , see :ref:`frank_wolfe` for
  a more detailed description.

  Args:
    fun : callable
        The objective function to be minimized.
            ``fun(x, *args) -> float``
        where x is an 1-D array with shape (n,) and `args`
        is a tuple of the fixed parameters needed to completely
        specify the function.

    x0: array-like
      Initial guess for solution.

    lmo: callable
      Takes as input a vector u of same size as x0 and returns both the update
      direction and the maximum admissible step-size.
      
    x0_rep: immutable
        Is used to initialize the active set when variant == 'pairwise'.

    variant: {'vanilla, 'pairwise'}
        Determines which Frank-Wolfe variant to use, along with lmo.
        Pairwise sets up and updates an active set of vertices.
        This is needed to make sure to not move out of the constraint set
        when using a pairwise LMO.

    jac : {callable,  '2-point', bool}, optional
        Method for computing the gradient vector. If it is a callable,
        it should be a function that returns the gradient vector:
            ``jac(x, *args) -> array_like, shape (n,)``
        where x is an array with shape (n,) and `args` is a tuple with
        the fixed parameters. Alternatively, the '2-point' select a finite
        difference scheme for numerical estimation of the gradient.
        If `jac` is a Boolean and is True, `fun` is assumed to return the
        gradient along with the objective function. If False, the gradient
        will be estimated using '2-point' finite difference estimation.

    step: str or callable, optional
      Step-size strategy to use. Should be one of

        - "backtracking", will use the backtracking line-search from [PANJ2020]_

        - "DR", will use the Demyanov-Rubinov step-size. This step-size minimizes a quadratic upper bound ob the objective using the gradient's lipschitz constant, passed in keyword argument `lipschitz`. [P2018]_

        - "sublinear", will use a decreasing step-size of the form 2/(k+2). [J2013]_

        - callable, if step is a callable function, it will use the step-size returned by step(locals).

    lipschitz: None or float, optional
      Estimate for the Lipschitz constant of the gradient. Required when step="DR".

    max_iter: integer, optional
      Maximum number of iterations.

    tol: float, optional
      Tolerance of the stopping criterion. The algorithm will stop whenever
      the Frank-Wolfe gap is below tol or the maximum number of iterations
      is exceeded.

    callback: callable, optional
      Callback to execute at each iteration. If the callable returns False
      then the algorithm with immediately return.

    eps: float or ndarray
        If jac is approximated, use this value for the step size.

    verbose: int, optional
      Verbosity level.


  Returns:
    scipy.optimize.OptimizeResult
      The optimization result represented as a
      ``scipy.optimize.OptimizeResult`` object. Important attributes are:
      ``x`` the solution array, ``success`` a Boolean flag indicating if
      the optimizer exited successfully and ``message`` which describes
      the cause of the termination. See `scipy.optimize.OptimizeResult`
      for a description of other attributes.


  References:

    .. [J2013] Jaggi, Martin. `"Revisiting Frank-Wolfe: Projection-Free Sparse Convex Optimization." <http://proceedings.mlr.press/v28/jaggi13-supp.pdf>`_ ICML 2013.

    .. [P2018] Pedregosa, Fabian `"Notes on the Frank-Wolfe Algorithm" <http://fa.bianp.net/blog/2018/notes-on-the-frank-wolfe-algorithm-part-i/>`_, 2018

    .. [PANJ2020] Pedregosa, Fabian, Armin Askari, Geoffrey Negiar, and Martin Jaggi. `"Step-Size Adaptivity in Projection-Free Optimization." <https://arxiv.org/pdf/1806.05123.pdf>`_ arXiv:1806.05123 (2020).


  Examples:
    * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sparse_benchmark.py`
    * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_vertex_overlap.py`
  """
    x0 = np.asanyarray(x0, dtype=np.float)
    if tol < 0:
        raise ValueError("Tol must be non-negative")
    x = x0.copy()

    if variant == 'vanilla':
        active_set = None
    elif variant == 'pairwise':
        active_set = defaultdict(float)
        active_set[x0_rep] = 1.

    else:
        raise ValueError("Variant must be one of {'vanilla', 'pairwise'}.")

    lipschitz_t = None
    step_size = None
    if lipschitz is not None:
        lipschitz_t = lipschitz

    func_and_grad = utils.build_func_grad(jac, fun, args, eps)

    f_t, grad = func_and_grad(x)
    old_f_t = None

    for it in range(max_iter):
        update_direction, fw_vertex_rep, away_vertex_rep, max_step_size = lmo(
            -grad, x, active_set)
        norm_update_direction = linalg.norm(update_direction)**2
        certificate = np.dot(update_direction, -grad)

        # .. compute an initial estimate for the ..
        # .. Lipschitz estimate if not given ...
        if lipschitz_t is None:
            eps = 1e-3
            grad_eps = func_and_grad(x + eps * update_direction)[1]
            lipschitz_t = linalg.norm(grad - grad_eps) / (
                eps * np.sqrt(norm_update_direction))
            print("Estimated L_t = %s" % lipschitz_t)

        if certificate <= tol:
            break
        if hasattr(step, "__call__"):
            step_size = step(locals())
            f_next, grad_next = func_and_grad(x + step_size * update_direction)
        elif step == "backtracking":
            step_size, lipschitz_t, f_next, grad_next = backtracking_step_size(
                x,
                f_t,
                old_f_t,
                func_and_grad,
                certificate,
                lipschitz_t,
                max_step_size,
                update_direction,
                norm_update_direction,
            )
        elif step == "DR":
            if lipschitz is None:
                raise ValueError(
                    'lipschitz needs to be specified with step="DR"')
            step_size = min(
                certificate / (norm_update_direction * lipschitz_t),
                max_step_size)
            f_next, grad_next = func_and_grad(x + step_size * update_direction)
        elif step == "sublinear":
            # .. without knowledge of the Lipschitz constant ..
            # .. we take the sublinear 2/(k+2) step-size ..
            step_size = 2.0 / (it + 2)
            f_next, grad_next = func_and_grad(x + step_size * update_direction)
        else:
            raise ValueError("Invalid option step=%s" % step)
        if callback is not None:
            if callback(locals()) is False:  # pylint: disable=g-bool-id-comparison
                break
        x += step_size * update_direction
        if variant == 'pairwise':
            update_active_set(active_set, fw_vertex_rep, away_vertex_rep,
                              step_size)
        old_f_t = f_t
        f_t, grad = f_next, grad_next
    if callback is not None:
        callback(locals())
    return optimize.OptimizeResult(x=x,
                                   nit=it,
                                   certificate=certificate,
                                   active_set=active_set)
Example #25
0
def minimize_svrg(
    f_deriv,
    A,
    b,
    x0,
    step_size,
    alpha=0,
    prox=None,
    max_iter=500,
    tol=1e-6,
    verbose=False,
    callback=None,
):
    r"""Stochastic average gradient augmented (SAGA) algorithm.

    The SAGA algorithm can solve optimization problems of the form

        argmin_{x \in R^p} \sum_{i}^n_samples f(A_i^T x, b_i) + alpha *
        ||x||_2^2 +
                                            + beta * ||x||_1

    Args:
      f_deriv
          derivative of f

      x0: np.ndarray or None, optional
          Starting point for optimization.

      step_size: float or None, optional
          Step size for the optimization. If None is given, this will be
          estimated from the function f.

      n_jobs: int
          Number of threads to use in the optimization. A number higher than 1
          will use the Asynchronous SAGA optimization method described in
          [Pedregosa et al., 2017]

      max_iter: int
          Maximum number of passes through the data in the optimization.

      tol: float
          Tolerance criterion. The algorithm will stop whenever the norm of the
          gradient mapping (generalization of the gradient for nonsmooth
          optimization)
          is below tol.

      verbose: bool
          Verbosity level. True might print some messages.

      trace: bool
          Whether to trace convergence of the function, useful for plotting
          and/or debugging. If ye, the result will have extra members
          trace_func, trace_time.


    Returns:
      opt: OptimizeResult
          The optimization result represented as a
          ``scipy.optimize.OptimizeResult`` object. Important attributes are:
          ``x`` the solution array, ``success`` a Boolean flag indicating if
          the optimizer exited successfully and ``message`` which describes
          the cause of the termination. See `scipy.optimize.OptimizeResult`
          for a description of other attributes.


    References:
      The SAGA algorithm was originally described in

      Aaron Defazio, Francis Bach, and Simon Lacoste-Julien. `SAGA: A fast
      incremental gradient method with support for non-strongly convex composite
      objectives. <https://arxiv.org/abs/1407.0202>`_ Advances in Neural
      Information Processing Systems. 2014.

      The implemented has some improvements with respect to the original,
      like support for sparse datasets and is described in

      Fabian Pedregosa, Remi Leblond, and Simon Lacoste-Julien.
      "Breaking the Nonsmooth Barrier: A Scalable Parallel Method
      for Composite Optimization." Advances in Neural Information
      Processing Systems (NIPS) 2017.
    """
    x = np.ascontiguousarray(x0).copy()
    n_samples, n_features = A.shape
    A = sparse.csr_matrix(A)

    if step_size is None:
        # then need to use line search
        raise ValueError

    if hasattr(prox, "__len__") and len(prox) == 2:
        blocks = prox[1]
        prox = prox[0]
    else:
        blocks = sparse.eye(n_features, n_features, format="csr")

    if prox is None:

        @utils.njit
        def prox(x, i, indices, indptr, d, step_size):
            pass

    A_data = A.data
    A_indices = A.indices
    A_indptr = A.indptr
    n_samples, n_features = A.shape

    rblocks_indices = blocks.T.tocsr().indices
    blocks_indptr = blocks.indptr
    bs_data, bs_indices, bs_indptr = _support_matrix(A_indices, A_indptr,
                                                     rblocks_indices,
                                                     blocks.shape[0])
    csr_blocks_1 = sparse.csr_matrix((bs_data, bs_indices, bs_indptr))

    # .. diagonal reweighting ..
    d = np.array(csr_blocks_1.sum(0), dtype=np.float).ravel()
    idx = d != 0
    d[idx] = n_samples / d[idx]
    d[~idx] = 1

    @utils.njit
    def full_grad(x):
        grad = np.zeros(x.size)
        for i in range(n_samples):
            p = 0.0
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                p += x[j_idx] * A_data[j]
            grad_i = f_deriv(p, b[i])
            # .. gradient estimate (XXX difference) ..
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                grad[j_idx] += grad_i * A_data[j] / n_samples
        return grad

    @utils.njit(nogil=True)
    def _svrg_epoch(x, x_snapshot, idx, gradient_average, grad_tmp, step_size):

        # .. inner iteration ..
        for i in idx:
            p = 0.0
            p_old = 0.0
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                p += x[j_idx] * A_data[j]
                p_old += x_snapshot[j_idx] * A_data[j]

            grad_i = f_deriv(p, b[i])
            old_grad_i = f_deriv(p_old, b[i])
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                grad_tmp[j_idx] = (grad_i - old_grad_i) * A_data[j]

            # .. update coefficients ..
            # .. first iterate on blocks ..
            for h_j in range(bs_indptr[i], bs_indptr[i + 1]):
                h = bs_indices[h_j]
                # .. then iterate on features inside block ..
                for b_j in range(blocks_indptr[h], blocks_indptr[h + 1]):
                    bias_term = d[h] * (gradient_average[b_j] + alpha * x[b_j])
                    x[b_j] -= step_size * (grad_tmp[b_j] + bias_term)
            prox(x, i, bs_indices, bs_indptr, d, step_size)

    idx = np.arange(n_samples)
    grad_tmp = np.zeros(n_features)
    success = False
    if callback is not None:
        callback(locals())
    for it in range(max_iter):
        x_snapshot = x.copy()
        gradient_average = full_grad(x_snapshot)
        np.random.shuffle(idx)
        _svrg_epoch(x, x_snapshot, idx, gradient_average, grad_tmp, step_size)
        if callback is not None:
            callback(locals())

        if np.abs(x - x_snapshot).sum() < tol:
            success = True
            break
    message = ""
    return optimize.OptimizeResult(x=x,
                                   success=success,
                                   nit=it,
                                   message=message)
Example #26
0
def minimize_pgd(closure,
                 x0,
                 prox,
                 step='backtracking',
                 max_iter=200,
                 max_iter_backtracking=1000,
                 backtracking_factor=.6,
                 tol=1e-8,
                 *prox_args,
                 callback=None):
    """
    Performs Projected Gradient Descent on batch of objectives of form:
      f(x) + g(x).
    We suppose we have access to gradient computation for f through closure,
    and to the proximal operator of g in prox.

    Args:
      closure: callable

      x0: torch.Tensor of shape (batch_size, *).

      prox: callable
        proximal operator of g

      step: 'backtracking' or float or torch.tensor of shape (batch_size,) or None.
        step size to be used. If None, will be estimated at the beginning
        using line search.
        If 'backtracking', will be estimated at each step using backtracking line search.

      max_iter: int
        number of iterations to perform.

      max_iter_backtracking: int
        max number of iterations in the backtracking line search

      backtracking_factor: float
        factor by which to multiply the step sizes during line search

      tol: float
        stops the algorithm when the certificate is smaller than tol
        for all datapoints in the batch

      prox_args: tuple
        (optional) additional args for prox

      callback: callable
        (optional) Any callable called on locals() at the end of each iteration.
        Often used for logging.
    """
    x = x0.detach().clone()
    batch_size = x.size(0)

    if step is None:
        # estimate lipschitz constant
        L = utils.init_lipschitz(closure, x0)
        step_size = 1. / L

    elif step == 'backtracking':
        L = 1.8 * utils.init_lipschitz(closure, x0)
        step_size = 1. / L

    elif type(step) == float:
        step_size = step * torch.ones(batch_size, device=x.device)

    else:
        raise ValueError("step must be float or backtracking or None")

    for it in range(max_iter):
        x.requires_grad = True

        fval, grad = closure(x)

        x_next = prox(x - utils.bmul(step_size, grad), step_size, *prox_args)
        update_direction = x_next - x

        if step == 'backtracking':
            step_size *= 1.1
            mask = torch.ones(batch_size, dtype=bool, device=x.device)

            with torch.no_grad():
                for _ in range(max_iter_backtracking):
                    f_next = closure(x_next, return_jac=False)
                    rhs = (fval + utils.bdot(grad, update_direction) +
                           utils.bmul(
                               utils.bdot(update_direction, update_direction),
                               1. / (2. * step_size)))
                    mask = f_next > rhs

                    if not mask.any():
                        break

                    step_size[mask] *= backtracking_factor
                    x_next = prox(x - utils.bmul(step_size, grad),
                                  step_size[mask], *prox_args)
                    update_direction[mask] = x_next[mask] - x[mask]
                else:
                    warnings.warn("Maximum number of line-search iterations "
                                  "reached.")

        with torch.no_grad():
            cert = torch.norm(utils.bmul(update_direction, 1. / step_size),
                              dim=-1)
            x.copy_(x_next)
            if (cert < tol).all():
                break

        if callback is not None:
            if callback(locals()) is False:
                break

    fval, grad = closure(x)
    return optimize.OptimizeResult(x=x,
                                   nit=it,
                                   fval=fval,
                                   grad=grad,
                                   certificate=cert)
Example #27
0
def fmin_CondatVu(fun, fun_deriv, g_prox, h_prox, L, x0, alpha=1.0, beta=1.0, tol=1e-12,
                  max_iter=10000, verbose=0, callback=None, step_size_x=1e-3,
                  step_size_y=1e3, max_iter_ls=20, g_prox_args=(), h_prox_args=()):
    """Condat-Vu primal-dual splitting method.

    This method for optimization problems of the form

            minimize_x f(x) + alpha * g(x) + beta * h(L x)

    where f is a smooth function and g is a (possibly non-smooth)
    function for which the proximal operator is known.

    Parameters
    ----------
    fun : callable
        f(x) returns the value of f at x.

    fun_deriv : callable
        f_prime(x) returns the gradient of f.

    g_prox : callable of the form g_prox(x, alpha)
        g_prox(x, alpha) returns the proximal operator of g at x
        with parameter alpha.

    x0 : array-like
        Initial guess

    L : ndarray or sparse matrix
        Linear operator inside the h term.

    max_iter : int
        Maximum number of iterations.

    verbose : int
        Verbosity level, from 0 (no output) to 2 (output on each iteration)

    callback : callable
        callback function (optional).

    Returns
    -------
    res : OptimizeResult
        The optimization result represented as a
        ``scipy.optimize.OptimizeResult`` object. Important attributes are:
        ``x`` the solution array, ``success`` a Boolean flag indicating if
        the optimizer exited successfully and ``message`` which describes
        the cause of the termination. See `scipy.optimize.OptimizeResult`
        for a description of other attributes.

    References
    ----------
    Condat, Laurent. "A primal-dual splitting method for convex optimization
    involving Lipschitzian, proximable and linear composite terms." Journal of
    Optimization Theory and Applications (2013).

    Chambolle, Antonin, and Thomas Pock. "On the ergodic convergence rates of a
    first-order primal-dual algorithm." Mathematical Programming (2015)
    """
    xk = np.array(x0, copy=True)
    yk = L.dot(xk)
    success = False
    if not max_iter_ls > 0:
        raise ValueError('Line search iterations need to be greater than 0')

    if g_prox is None:
        def g_prox(step_size, x, *args): return x
    if h_prox is None:
        def h_prox(step_size, x, *args): return x

    # conjugate of h_prox
    def h_prox_conj(step_size, x, *args):
        return x - step_size * h_prox(beta / step_size, x / step_size, *args)
    it = 1
    # .. main iteration ..
    while it < max_iter:

        grad_fk = fun_deriv(xk)
        x_next = g_prox(step_size_x * alpha,
                        xk - step_size_x * grad_fk - step_size_x * L.T.dot(yk),
                        *g_prox_args)
        y_next = h_prox_conj(step_size_y, yk + step_size_y * L.dot(2 * x_next - xk),
                             *h_prox_args)

        incr = linalg.norm(x_next - xk) ** 2 + linalg.norm(y_next - yk) ** 2
        yk = y_next
        xk = x_next

        if verbose > 0:
            print("Iteration %s, increment: %s" % (it, incr))

        if callback is not None:
            callback(xk)

        if incr < tol:
            if verbose:
                print("Achieved relative tolerance at iteration %s" % it)
            success = True
            break

        it += 1

    if it >= max_iter:
        warnings.warn(
            "proximal_gradient did not reach the desired tolerance level", RuntimeWarning)

    return optimize.OptimizeResult(
        x=xk, success=success, nit=it)
Example #28
0
def minimize_saga(
    f_deriv,
    A,
    b,
    x0,
    step_size,
    prox=None,
    alpha=0,
    max_iter=500,
    tol=1e-6,
    verbose=1,
    callback=None,
):
    r"""Stochastic average gradient augmented (SAGA) algorithm.

    This algorithm can solve linearly-parametrized loss functions of the form

        minimize_x \sum_{i}^n_samples f(A_i^T x, b_i) + alpha ||x||_2^2 + g(x)

    where g is a function for which we have access to its proximal operator.

    .. warning::
        This function is experimental, API is likely to change.


    Args:
      f
          loss functions.

      x0: np.ndarray or None, optional
          Starting point for optimization.

      step_size: float or None, optional
          Step size for the optimization. If None is given, this will be
          estimated from the function f.

      max_iter: int
          Maximum number of passes through the data in the optimization.

      tol: float
          Tolerance criterion. The algorithm will stop whenever the norm of the
          gradient mapping (generalization of the gradient for nonsmooth
          optimization) is below tol.

      verbose: bool
          Verbosity level. True might print some messages.

      trace: bool
          Whether to trace convergence of the function, useful for plotting
          and/or debugging. If ye, the result will have extra members trace_func,
          trace_time.


    Returns:
      opt: OptimizeResult
          The optimization result represented as a
          ``scipy.optimize.OptimizeResult`` object. Important attributes are:
          ``x`` the solution array, ``success`` a Boolean flag indicating if
          the optimizer exited successfully and ``message`` which describes
          the cause of the termination. See `scipy.optimize.OptimizeResult`
          for a description of other attributes.


    References:
      This variant of the SAGA algorithm is described in:

      `"Breaking the Nonsmooth Barrier: A Scalable Parallel Method for Composite
      Optimization."
      <https://arxiv.org/pdf/1707.06468.pdf>`_, Fabian Pedregosa, Remi Leblond,
      and Simon Lacoste-Julien. Advances in Neural Information Processing Systems
      (NIPS) 2017.
    """
    # convert any input to CSR sparse matrix representation. In the future we
    # might want to implement also a version for dense data (numpy arrays) to
    # better exploit data locality
    x = np.ascontiguousarray(x0).copy()
    n_samples, n_features = A.shape
    A = sparse.csr_matrix(A)

    if step_size is None:
        # then need to use line search
        raise ValueError

    if hasattr(prox, "__len__") and len(prox) == 2:
        blocks = prox[1]
        prox = prox[0]
    else:
        blocks = sparse.eye(n_features, n_features, format="csr")

    if prox is None:

        @utils.njit
        def prox(x, i, indices, indptr, d, step_size):
            pass

    A_data = A.data
    A_indices = A.indices
    A_indptr = A.indptr
    n_samples, n_features = A.shape

    rblocks_indices = blocks.T.tocsr().indices
    blocks_indptr = blocks.indptr
    bs_data, bs_indices, bs_indptr = _support_matrix(A_indices, A_indptr,
                                                     rblocks_indices,
                                                     blocks.shape[0])
    csr_blocks_1 = sparse.csr_matrix((bs_data, bs_indices, bs_indptr))

    # .. diagonal reweighting ..
    d = np.array(csr_blocks_1.sum(0), dtype=np.float).ravel()
    idx = d != 0
    d[idx] = n_samples / d[idx]
    d[~idx] = 1

    @utils.njit(nogil=True)
    def _saga_epoch(x, idx, memory_gradient, gradient_average, grad_tmp,
                    step_size):
        # .. inner iteration of the SAGA algorithm..
        for i in idx:

            # .. gradient estimate ..
            p = 0.0
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                p += x[j_idx] * A_data[j]
            grad_i = f_deriv(p, b[i])
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                grad_tmp[j_idx] = (grad_i - memory_gradient[i]) * A_data[j]

            # .. update coefficients ..
            # .. first iterate on blocks ..
            for h_j in range(bs_indptr[i], bs_indptr[i + 1]):
                h = bs_indices[h_j]
                # .. then iterate on features inside block ..
                for b_j in range(blocks_indptr[h], blocks_indptr[h + 1]):
                    bias_term = d[h] * (gradient_average[b_j] + alpha * x[b_j])
                    x[b_j] -= step_size * (grad_tmp[b_j] + bias_term)
            prox(x, i, bs_indices, bs_indptr, d, step_size)

            # .. update memory terms ..
            for j in range(A_indptr[i], A_indptr[i + 1]):
                j_idx = A_indices[j]
                tmp = (grad_i - memory_gradient[i]) * A_data[j]
                tmp /= n_samples
                gradient_average[j_idx] += tmp
                grad_tmp[j_idx] = 0
            memory_gradient[i] = grad_i

    # .. initialize memory terms ..
    memory_gradient = np.zeros(n_samples)
    gradient_average = np.zeros(n_features)
    grad_tmp = np.zeros(n_features)
    idx = np.arange(n_samples)
    success = False
    if callback is not None:
        callback(locals())
    for it in range(max_iter):
        x_old = x.copy()
        np.random.shuffle(idx)
        _saga_epoch(x, idx, memory_gradient, gradient_average, grad_tmp,
                    step_size)
        if callback is not None:
            callback(locals())

        diff_norm = np.abs(x - x_old).sum()
        if diff_norm < tol:
            success = True
            break
    return optimize.OptimizeResult(x=x, success=success, nit=it)
Example #29
0
def noop_min(fun, x0, args, **options):
    return op.OptimizeResult(x=x0, fun=fun(x0), success=True, nfev=1)
Example #30
0
def load_mcmc_solution(h5_file, job_timestamp=None, logger=None):
    """Load Monte Carlo Markov Chain solution from HDF5 file.

    :param h5_file: File from which to load solution
    :type h5_file: str or pathlib.Path
    :param job_timestamp: Timestamp of job whose solution is to be loaded
    :type job_timestamp: str or NoneType
    :param logger: Output logging instance
    :type logger: logging.Logger
    :return: (solution, job configuration), job timestamp
    :rtype: (solution, dict), str
    """
    assert isinstance(job_timestamp, (str, type(None)))

    # TODO: migrate this to member of a new class for encapsulating an MCMC solution

    def read_data_empty(dataset):
        """
        Read dataset that might be empty. If empty, return None.
        See also function `write_data_empty`.

        :param dataset: The h5py.Dataset node to read.
        :type dataset: h5py.Dataset
        :return: Dataset value or None
        :rtype: numpy.array or NoneType
        """
        if not dataset.shape:
            value = None
        else:
            value = dataset.value
        # end if
        return value

    # end func

    def read_list_dataset(source_node):
        """Read list from a datase node containing ordered collection of items.
        See also function `write_list_dataset`.
        """
        list_data = []
        for idx, ds in source_node.items():
            list_data.append((int(idx), ds.value))
        # end for
        # Sort clusters by idx, then throw away the idx values.
        list_data.sort(key=lambda i: i[0])
        return [d[1] for d in list_data]

    # end func

    soln_configs = []
    with h5py.File(h5_file, 'r') as h5f:
        while job_timestamp is None:
            timestamps = list(h5f.keys())
            if len(timestamps) > 1:
                for i, ts in enumerate(timestamps):
                    job_node = h5f[ts]
                    job_tracking = json.loads(job_node.attrs['job_tracking']) \
                        if 'job_tracking' in job_node.attrs else ''
                    if job_tracking:
                        job_tracking = '(' + ', '.join([
                            ': '.join([k, str(v)])
                            for k, v in job_tracking.items()
                        ]) + ')'
                    # end if
                    print('[{}]'.format(i), ts, job_tracking)
                # end for
                index = input('Choose dataset number to load: ')
                if index.isdigit() and (0 <= int(index) < len(timestamps)):
                    index = int(index)
                # end if
            else:
                index = 0
            # end if
            job_timestamp = timestamps[index] if isinstance(index,
                                                            int) else None
        # end while

        job_root = h5f[job_timestamp]
        # source_data_file = job_root.attrs['input_file']
        for station_id, station_node in job_root.items():
            if logger:
                logger.info('Loading {}'.format(station_id.replace('_', '.')))
            # end if
            job_config = json.loads(station_node.attrs['config'])
            format_version = station_node.attrs['format_version']
            job_config.update({'format_version': format_version})
            if logger:
                logger.info(
                    'H5 storage format version: {}'.format(format_version))
            # end if

            try:
                soln = optimize.OptimizeResult()
                soln.x = station_node['x'].value
                soln.num_input_seismograms = station_node[
                    'num_input_seismograms'].value

                cluster_node = station_node['clusters']
                soln.clusters = read_list_dataset(cluster_node)

                assert len(soln.x) == len(soln.clusters)

                cluster_energy_node = station_node['cluster_energy']
                soln.cluster_funvals = read_list_dataset(cluster_energy_node)

                per_event_energy_node = station_node['per_event_energy']
                soln.esu = read_list_dataset(per_event_energy_node)

                # Subsurface seismograms
                subsurface_node = station_node['subsurface']
                subsurface = {}
                for layer_name, layer_node in subsurface_node.items():
                    subsurface[layer_name] = read_list_dataset(layer_node)
                # end for
                soln.subsurface = subsurface

                soln.bins = station_node['bins'].value
                soln.distribution = station_node['distribution'].value
                soln.acceptance_rate = station_node['acceptance_rate'].value
                soln.success = bool(station_node['success'].value)
                soln.status = int(station_node['status'].value)
                soln.message = station_node['message'].value
                soln.fun = station_node['fun'].value
                soln.jac = read_data_empty(station_node['jac'])
                soln.nfev = int(station_node['nfev'].value)
                soln.njev = int(station_node['njev'].value)
                soln.nit = int(station_node['nit'].value)
                soln.maxcv = read_data_empty(station_node['maxcv'])
                soln.samples = read_data_empty(station_node['samples'])
                soln.sample_funvals = read_data_empty(
                    station_node['sample_energies'])
                bounds = station_node['bounds'].value
                soln.bounds = optimize.Bounds(bounds[0], bounds[1])
                soln.version = station_node['version'].value
                if 'rnd_seed' in station_node:
                    soln.rnd_seed = int(station_node['rnd_seed'].value)
                else:
                    soln.rnd_seed = None
                # end if

                soln_configs.append((soln, job_config))
            except TypeError as exc:
                if logger:
                    logger.error(
                        'Error loading station {} solution'.format(station_id))
                    logger.error(repr(exc))
            # end try
        # end for
    # end with

    return soln_configs, job_timestamp