def rms_height_from_area(topography):
    """
    Compute the root mean square height amplitude of a topography or
    line scan stored on a uniform grid from the whole areal data.
    (This is the Sq value.)

    Parameters
    ----------
    topography : :obj:`SurfaceTopography` or :obj:`UniformLineScan`
        SurfaceTopography object containing height information.

    Returns
    -------
    rms_height : float
        Root mean square height value.
    """
    if topography.dim <= 1:
        raise ValueError(
            'Areal rms height can only be computed for topographies, not line scans.'
        )
    elif topography.dim == 2:
        n = np.prod(topography.nb_grid_pts)
        pnp = Reduction(topography._communicator)
        profile = topography.heights()
        return np.sqrt(pnp.sum((profile - pnp.sum(profile) / n)**2) / n)
    else:
        raise ValueError(
            f'Cannot handle topographies of dimension {topography.dim}')
def test_fill_undefined_data_parallel(comm):
    np.random.seed(comm.rank)
    local_data = np.random.uniform(size=(3, 1))
    local_data[local_data > 0.9] = np.nan
    if comm.rank == 0:  # make sure we always have undefined data
        local_data[0, 0] = np.nan
    topography = Topography(local_data, (1., 1.),
                            info=dict(test=1),
                            communicator=comm,
                            decomposition="subdomain",
                            nb_grid_pts=(3, comm.size),
                            subdomain_locations=(0, comm.rank))

    filled_topography = topography.fill_undefined_data(fill_value=-np.infty)
    assert topography.has_undefined_data
    assert not filled_topography.has_undefined_data

    mask = np.ma.getmask(topography.heights())
    nmask = np.logical_not(mask)

    reduction = Reduction(comm)

    assert reduction.all(filled_topography[nmask] == topography[nmask])
    assert reduction.all(filled_topography[mask] == -np.infty)
    assert not filled_topography.has_undefined_data
예제 #3
0
def test_analytical_min(comm):
    """
    Compares the result with the analyticaly known posistion of the minimum
    :return:
    """
    def printMPI(msg):
        for i in range(comm.Get_size()):
            comm.barrier()
            if comm.Get_rank() == i:
                print("Proc {}: {}".format(i, msg))

    n = 50

    Objective = mp.Extended_Rosenbrock

    PObjective = MPI_Objective_Interface(Objective,
                                         nb_domain_grid_pts=n,
                                         comm=comm)

    x0 = PObjective.startpoint()

    res = LBFGS(PObjective.f_grad,
                x0,
                jac=True,
                maxcor=5,
                maxiter=100,
                gtol=1e-12,
                ftol=0,
                pnp=Reduction(comm))
    #                        ^ only terminates if gradient condition is satisfied
    assert res.success
    assert res.message == "CONVERGENCE: NORM_OF_GRADIENT_<=_GTOL"
    np.testing.assert_allclose(res.x, PObjective.xmin(), atol=1e-16, rtol=1e-7)

    assert np.abs(res.fun - Objective.minVal(n)) < 1e-7
def rms_height_from_area(topography):
    """
    Compute the root mean square height amplitude of a topography or
    line scan stored on a uniform grid from the whole areal data.
    (This is the Sq value.)

    Parameters
    ----------
    topography : :obj:`SurfaceTopography` or :obj:`UniformLineScan`
        SurfaceTopography object containing height information.

    Returns
    -------
    rms_height : float
        Root mean square height value.
    """
    n = np.prod(topography.nb_grid_pts)
    pnp = Reduction(topography._communicator)
    profile = topography.heights()
    return np.sqrt(pnp.sum((profile - pnp.sum(profile) / n)**2) / n)
def test_positions(comm):
    nx, ny = (12 * comm.Get_size(), 10 * comm.Get_size() + 1)
    sx = 33.
    sy = 54.
    fftengine = FFT((nx, ny), fft='mpi', communicator=comm)

    surf = Topography(np.zeros(fftengine.nb_subdomain_grid_pts),
                      physical_sizes=(sx, sy),
                      decomposition='subdomain',
                      nb_grid_pts=(nx, ny),
                      subdomain_locations=fftengine.subdomain_locations,
                      communicator=comm)

    x, y = surf.positions()
    assert x.shape == fftengine.nb_subdomain_grid_pts
    assert y.shape == fftengine.nb_subdomain_grid_pts

    assert Reduction(comm).min(x) == 0
    assert abs(Reduction(comm).max(x) - sx * (1 - 1. / nx)) \
           < 1e-8 * sx / nx, "{}".format(x)
    assert Reduction(comm).min(y) == 0
    assert abs(Reduction(comm).max(y) - sy * (1 - 1. / ny)) < 1e-8
예제 #6
0
def rms_height(topography, kind='Sq'):
    """
    Compute the root mean square height amplitude of a topography or
    line scan stored on a uniform grid.

    Parameters
    ----------
    topography : :obj:`SurfaceTopography` or :obj:`UniformLineScan`
        SurfaceTopography object containing height information.

    Returns
    -------
    rms_height : float
        Root mean square height value.
    """
    n = np.prod(topography.nb_grid_pts)
    # if topography.is_MPI:
    pnp = Reduction(topography._communicator)
    profile = topography.heights()
    if kind == 'Sq':
        return np.sqrt(pnp.sum((profile - pnp.sum(profile) / n)**2) / n)
    elif kind == 'Rq':
        # Problem: when one of the processors holds the full data he isn't able
        # to detect if any axis is MPI_Parallelized
        # this problem is solved automatically if we do not support one axis
        # to be zero
        decomp_axis = [
            full != loc for full, loc in zip(np.array(topography.nb_grid_pts),
                                             profile.shape)
        ]
        temppnp = pnp if decomp_axis[0] else np
        return np.sqrt(
            temppnp.sum((profile - temppnp.sum(profile, axis=0) /
                         topography.nb_grid_pts[0])**2) / n)
    else:
        raise RuntimeError("Unknown rms height kind '{}'.".format(kind))
예제 #7
0
    def __init__(self, substrate, surface):
        """ Represents a contact problem
        Keyword Arguments:
        substrate   -- An instance of HalfSpace. Defines the solid mechanics in
                       the substrate
        surface     -- An instance of SurfaceTopography, defines the profile.
        """
        self.substrate = substrate
        self.area_per_pt = self.substrate.area_per_pt
        self.surface = surface
        self.dim = None
        self.gap = None
        self.disp = None

        self.reduction = Reduction(substrate.communicator)

        self.comp_slice = self.substrate.local_topography_subdomain_slices
예제 #8
0
def test_time_complexity(comm):

    maxcor = 5
    Objective = mp.Extended_Rosenbrock
    n = np.array([10, 100, 1000, 1e4, 1e5, 1e6], dtype=int)
    t = np.zeros(len(n), dtype=float)
    res = [None] * len(n)
    pnp = Reduction(comm)
    for i in range(len(n)):
        PObjective = MPI_Objective_Interface(Objective,
                                             nb_domain_grid_pts=n[i],
                                             comm=comm)
        x0 = PObjective.startpoint()

        res[i], t[i] = timer(LBFGS,
                             PObjective.f_grad,
                             x0,
                             jac=True,
                             maxcor=maxcor,
                             maxiter=100000,
                             gtol=(1e-5),
                             pnp=pnp)

        assert res[i].success

    if True:
        import matplotlib.pyplot as plt

        fig, ax = plt.subplots()
        ax.plot(n, t / n, '+-', label="time / DOF")
        ax.plot(n, [t[i] / n[i] / res[i].nit for i in range(len(n))],
                '+-',
                label="time per DOF per iteration")
        ax2 = plt.twinx(ax)
        ax2.plot(n, [res[i].nit for i in range(len(n))], 'o', label="nit")
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel("t/n (s)")
        ax.set_xlabel("DOF")
        ax.legend()
        ax2.legend()
        #ax.plot(n,n,c='gray')
        plt.show(block=True)
예제 #9
0
    def __init__(self, nb_domain_grid_pts, pnp=Reduction()):
        raise NotImplementedError("Need to implement communication")
        comm = pnp.comm
        nprocs = comm.Get_size()
        rank = comm.Get_rank()

        step = nb_domain_grid_pts // nprocs

        if rank == nprocs - 1:
            self.subdomain_slices = slice(rank * step, None)
            self.subdomain_locations = rank * step
            self.nb_subdomain_grid_pts = nb_domain_grid_pts - rank * step
        else:
            self.subdomain_slices = slice(rank * step, (rank + 1) * step)
            self.subdomain_locations = rank * step
            self.nb_subdomain_grid_pts = step

        #helps to select the data that has odd or even index in the global array
        self._sl_odd = slice(self.subdomain_locations % 2, None, 2)
        self._sl_even = slice((self.subdomain_locations + 1) % 2, None, 2)
        self.pnp = pnp
        self.nfeval = 0
        self.ngradeval = 0
예제 #10
0
    def __init__(self,
                 nb_domain_grid_pts,
                 pnp=Reduction(),
                 factors=None,
                 startpoint=None):
        comm = pnp.comm
        nprocs = comm.Get_size()
        rank = comm.Get_rank()

        step = nb_domain_grid_pts // nprocs

        if rank == nprocs - 1:
            self.subdomain_slices = slice(rank * step, None)
            self.subdomain_locations = rank * step
            self.nb_subdomain_grid_pts = nb_domain_grid_pts - rank * step
        else:
            self.subdomain_slices = slice(rank * step, (rank + 1) * step)
            self.subdomain_locations = rank * step
            self.nb_subdomain_grid_pts = step

        #helps to select the data that has odd or even index in the global array
        self.pnp = pnp

        if factors is not None:
            self.factors = factors[self.subdomain_slices]
        else:
            self.factors = np.random.random(self.nb_subdomain_grid_pts) + 0.1

        if startpoint is not None:
            self._startpoint = startpoint[self.subdomain_slices]
        else:
            self._startpoint = np.random.normal(
                size=self.nb_subdomain_grid_pts)

        self.nfeval = 0
        self.ngradeval = 0
def show_parallel_speedup():
    msg = ""

    orsizes = np.array([4, 8, 10, 20])
    orsizes = orsizes[orsizes <= MPI.COMM_WORLD.size]
    sizes = orsizes.copy()

    toPlot = MPI.COMM_WORLD.Get_rank() == 0 and True
    if toPlot:
        import matplotlib
        matplotlib.use("Agg")
        import matplotlib.pyplot as plt
        fig, (ax, ax2) = plt.subplots(2, 1, sharex=True)
        ax.set_xlabel("nprocs")
        ax.set_ylabel("t[1proc] / t")
        ax2.set_xlabel("nprocs")
        ax2.set_ylabel("t (s)")
        ax2.set_yscale('log')

        ax.plot(sizes, sizes, '--k', label="ideal")

    #for n in [int(1e5),int(2e5),int(1e6),int(2e6),int(1e7)]:
    for n in [int(1e6)]:
        #sizes = orsizes[orsizes > n / 1e4]

        if len(sizes) == 0: continue
        t = np.zeros(len(sizes), dtype=float)
        res = [None] * len(sizes)

        #Objective = mp.Extended_Rosenbrock
        maxcor = 5
        factors = 0.1 + np.random.random(n)
        startpoint = np.random.normal(size=n)
        for i in range(len(sizes)):
            size = sizes[i]
            color = 0 if MPI.COMM_WORLD.rank < size else 1
            if MPI.COMM_WORLD.size == size:
                comm = MPI.COMM_WORLD
            elif size == 1:
                comm = MPI.COMM_SELF
            else:
                comm = MPI.COMM_WORLD.Split(color)

            pnp = Reduction(comm)
            #PObjective = MPI_Objective_Interface(Objective, nb_domain_grid_pts=n, comm=comm)
            PObjective = MPI_Quadratic(nb_domain_grid_pts=n,
                                       pnp=pnp,
                                       factors=factors,
                                       startpoint=startpoint)
            x0 = PObjective.startpoint()

            if MPI.COMM_WORLD.Get_rank() == 0:
                print(" Before min n = {}".format(n))

            res[i], t[i] = timer(LBFGS,
                                 PObjective.f,
                                 x0,
                                 jac=PObjective.grad,
                                 maxcor=maxcor,
                                 maxiter=100000,
                                 gtol=(1e-5),
                                 store_iterates=None,
                                 pnp=pnp)
            msg += "size {}:\n".format(size)

            assert res[i].success, "Minimization faild"
            assert pnp.max(
                abs(np.reshape(res[i].x, (-1,)) - np.reshape(PObjective.xmin(),
                                                             (-1,)))) \
                   / pnp.max(
                abs(PObjective.startpoint() - PObjective.xmin())) < 1e-5
            if MPI.COMM_WORLD.Get_rank() == 0:
                print("n = {}, size = {}".format(n, size))

        if toPlot:
            ax.plot(sizes,
                    float(sizes[0] * t[0]) / t,
                    '-o',
                    label="n = {}".format(n))
            ax2.plot(sizes, t, '-o', label="n = {}".format(n))
            fig.savefig("LBFGS_parallel_speedup.png")

    if toPlot:
        ax.legend()
        fig.savefig("LBFGS_parallel_speedup.png")
def pnp(comm):
    return Reduction(comm)
예제 #13
0
def constrained_conjugate_gradients(substrate, topography, hardness=None,
                                    external_force=None, offset=None,
                                    initial_displacements=None,
                                    initial_forces=None,
                                    pentol=None, prestol=1e-5,
                                    mixfac=0.1,
                                    maxiter=100000,
                                    logger=None,
                                    callback=None,
                                    verbose=False):
    """
    Use a constrained conjugate gradient optimization to find the equilibrium
    configuration deflection of an elastic manifold. The conjugate gradient
    iteration is reset using the steepest descent direction whenever the
    contact area changes.
    Method is described in I.A. Polonsky, L.M. Keer, Wear 231, 206 (1999)
    Parameters
    ----------
    substrate : elastic manifold
        Elastic manifold.
    topography: SurfaceTopography object
        Height profile of the rigid counterbody
    hardness : array_like
        Hardness of the substrate. Pressure cannot exceed this value. Can be
        scalar or array (i.e. per pixel) value.
    external_force : float
        External force. Constrains the sum of forces to this value.
    offset : float
        Offset of rigid surface. Ignore if external_force is specified.
    initial_displacements : array_like
        Displacement field for initializing the solver. Guess an initial
        value if set to None.
    initial_forces: array_like
        pixel forces field for initializing the solver. Is computed from
        initial_displacements if none
    pentol : float
        Maximum penetration of contacting regions required for convergence.
    prestol : float
        maximum pressure outside the contact region allowed for convergence
    maxiter : float
        Maximum number of iterations.
    logger: ContactMechanics.Tools.Logger
        reports status and values at each iteration
    callback: callable(int iteration, array_link forces, dict d)
        called each iteration. The dictionary contains additional scalars
    verbose: bool
        If True, more scalar quantities are passed to the logger
    Returns
    -------
    Optimisation result
        x: displacements
        fun: elastic energy
        jac: forces
        active_set: points where forces are not constrained to 0 or hardness
        offset: offset i rigid surface, results from the optimization processes
           when the external_force is constrained
    """

    if substrate.nb_subdomain_grid_pts != substrate.nb_domain_grid_pts:
        # check that a topography instance is provided and not only a numpy
        # array
        if not hasattr(topography, "nb_grid_pts"):
            raise ValueError("You should provide a topography object when "
                             "working with MPI")

    reduction = Reduction(substrate.communicator)

    # surface is the array holding the data assigned to the processsor
    if not hasattr(topography, "nb_grid_pts"):
        surface = topography
        topography = Topography(surface,
                                physical_sizes=substrate.physical_sizes)
    else:
        surface = topography.heights()  # Local data

    # Note: Suffix _r deontes real-space _q reciprocal space 2d-arrays

    nb_surface_pts = np.prod(topography.nb_grid_pts)
    if pentol is None:
        # Heuristics for the possible tolerance on penetration.
        # This is necessary because numbers can vary greatly
        # depending on the system of units.
        pentol = topography.rms_height_from_area() / (
                10 * np.mean(topography.nb_grid_pts))
        # If pentol is zero, then this is a flat surface. This only makes
        # sense for nonperiodic calculations, i.e. it is a punch. Then
        # use the offset to determine the tolerance
        if pentol == 0:
            pentol = (offset + reduction.sum(surface[...]) / nb_surface_pts) \
                     / 1000
        # If we are still zero use an arbitrary value
        if pentol == 0:
            pentol = 1e-3

    surf_mask = np.ma.getmask(
        surface)  # TODO: Test behaviour with masked arrays.

    if logger is not None:
        logger.pr('maxiter = {0}'.format(maxiter))
        logger.pr('pentol = {0}'.format(pentol))

    if offset is None:
        offset = 0

    if initial_displacements is None:
        u_r = np.zeros(substrate.nb_subdomain_grid_pts)
    else:
        u_r = initial_displacements.copy()

    # slice of the local data of the computation subdomain corresponding to the
    # topography subdomain. It's typically the first half of the computation
    # subdomain (along the non-parallelized dimension) for FreeFFTElHS
    # It's the same for PeriodicFFTElHS
    comp_slice = [slice(0, max(0, min(
        substrate.nb_grid_pts[i] - substrate.subdomain_locations[i],
        substrate.nb_subdomain_grid_pts[i])))
                  for i in range(substrate.dim)]
    if substrate.dim not in (1, 2):
        raise Exception(
            ("Constrained conjugate gradient currently only implemented for 1 "
             "or 2 dimensions (Your substrate has {}.).").format(
                substrate.dim))

    comp_mask = np.zeros(substrate.nb_subdomain_grid_pts, dtype=bool)
    comp_mask[tuple(comp_slice)] = True

    surf_mask = np.ma.getmask(surface)
    if surf_mask is np.ma.nomask:
        surf_mask = np.ones(topography.nb_subdomain_grid_pts, dtype=bool)
    else:
        comp_mask[tuple(comp_slice)][surf_mask] = False
        surf_mask = np.logical_not(surf_mask)
    pad_mask = np.logical_not(comp_mask)
    N_pad = reduction.sum(pad_mask * 1)
    u_r[comp_mask] = np.where(u_r[comp_mask] < surface[surf_mask] + offset,
                              surface[surf_mask] + offset,
                              u_r[comp_mask])

    result = optim.OptimizeResult()
    result.nfev = 0
    result.nit = 0
    result.success = False
    result.message = "Not Converged (yet)"

    # Compute forces
    # p_r = -np.fft.ifft2(np.fft.fft2(u_r)/gf_q).real
    if initial_forces is None:
        p_r = substrate.evaluate_force(u_r)
    else:
        p_r = initial_forces.copy()
        u_r = substrate.evaluate_disp(p_r)

    result.nfev += 1
    # Pressure outside the computational region must be zero
    p_r[pad_mask] = 0.0

    # iteration
    delta = 0
    delta_str = 'reset'
    G_old = 1.0
    t_r = np.zeros_like(u_r)

    tau = 0.0
    for it in range(1, maxiter + 1):
        result.nit = it

        # Reset contact area (area that feels compressive stress)
        c_r = p_r < 0.0
        # TODO: maybe np.where(self.interaction.force < 0., 1., 0.)

        # Compute total contact area (area with compressive pressure)
        A_contact = reduction.sum(c_r * 1)

        # If a hardness is specified, exclude values that exceed the hardness
        # from the "contact area". Note: "contact area" here is the region that
        # is optimized by the CG iteration.
        if hardness is not None:
            c_r = np.logical_and(c_r, p_r > -hardness)

        # Compute total are treated by the CG optimizer (which exclude flowing)
        # portions.
        A_cg = reduction.sum(c_r * 1)

        # Compute gap
        g_r = u_r[comp_mask] - surface[surf_mask]
        if external_force is not None:
            offset = 0
            if A_cg > 0:
                offset = reduction.sum(g_r[c_r[comp_mask]]) / A_cg
        g_r -= offset

        # Compute G = sum(g*g) (over contact area only)
        G = reduction.sum(c_r[comp_mask] * g_r * g_r)

        if delta_str != 'mix' and not (hardness is not None and A_cg == 0):
            # t = (g + delta*(G/G_old)*t) inside contact area and 0 outside
            if delta > 0 and G_old > 0:
                t_r[comp_mask] = c_r[comp_mask] * (
                        g_r + delta * (G / G_old) * t_r[comp_mask])
            else:
                t_r[comp_mask] = c_r[comp_mask] * g_r

            # Compute elastic displacement that belong to t_r
            # substrate (Nelastic manifold: r_r is negative of Polonsky,
            # Kerr's r)
            # r_r = -np.fft.ifft2(gf_q*np.fft.fft2(t_r)).real
            r_r = substrate.evaluate_disp(t_r)
            result.nfev += 1
            # Note: Sign reversed from Polonsky, Keer because this r_r is
            # negative of theirs.
            tau = 0.0
            if A_cg > 0:
                # tau = -sum(g*t)/sum(r*t) where sum is only over contact
                # region
                x = -reduction.sum(c_r * r_r * t_r)
                if x > 0.0:
                    tau = \
                        reduction.sum(c_r[comp_mask] * g_r * t_r[comp_mask]) \
                        / x
                else:
                    G = 0.0

            p_r += tau * c_r * t_r
        else:
            # The CG area can vanish if this is a plastic calculation. In that
            # case we need to use the gap to decide which regions contact. All
            # contact area should then be the hardness value. We use simple
            # relaxation algorithm to converge the contact area in that case.

            if delta_str != 'mixconv':
                delta_str = 'mix'

            # Mix pressure
            # p_r[comp_mask] = (1-mixfac)*p_r[comp_mask] + \
            #                 mixfac*np.where(g_r < 0.0,
            #                                 -hardness*np.ones_like(g_r),
            #                                 np.zeros_like(g_r))
            # Evolve pressure in direction of energy gradient
            # p_r[comp_mask] += mixfac*(u_r[comp_mask] + g_r)
            p_r[comp_mask] = (1 - mixfac) * p_r[
                comp_mask] - mixfac * hardness * (g_r < 0.0)
            mixfac *= 0.5
            # p_r[comp_mask] = -hardness*(g_r < 0.0)

        # Find area with tensile stress and negative gap
        # (i.e. penetration of the two surfaces)
        mask_tensile = p_r >= 0.0
        nc_r = np.logical_and(mask_tensile[comp_mask], g_r < 0.0)
        # If hardness is specified, find area where pressure exceeds hardness
        # but gap is positive
        if hardness is not None:
            mask_flowing = p_r <= -hardness
            nc_r = np.logical_or(nc_r, np.logical_and(mask_flowing[comp_mask],
                                                      g_r > 0.0))

        # For nonperiodic calculations: Find maximum pressure in pad region.
        # This must be zero.
        pad_pres = 0
        if N_pad > 0:
            pad_pres = reduction.max(abs(p_r[pad_mask]))

        # Find maximum pressure outside contacting region and the deviation
        # from hardness inside the flowing regions. This should go to zero.
        max_pres = 0
        if reduction.sum(mask_tensile * 1) > 0:
            max_pres = reduction.max(p_r[mask_tensile] * 1)
        if hardness:
            A_fl = reduction.sum(mask_flowing)
            if A_fl > 0:
                max_pres = max(max_pres,
                               -reduction.min(p_r[mask_flowing] + hardness))

        # Set all tensile stresses to zero
        p_r[mask_tensile] = 0.0

        # Adjust pressure
        if external_force is not None:
            psum = -reduction.sum(p_r[comp_mask])
            if psum != 0:
                p_r *= external_force / psum
            else:
                p_r = -external_force / nb_surface_pts * np.ones_like(p_r)
                p_r[pad_mask] = 0.0

        # If hardness is specified, set all stress larger than hardness to the
        # hardness value (i.e. truncate pressure)
        if hardness is not None:
            p_r[mask_flowing] = -hardness

        if delta_str != 'mix':
            if reduction.sum(nc_r * 1) > 0:
                # The contact area has changed! nc_r contains area that
                # penetrate but have zero (or tensile) pressure. They hence
                # violate the contact constraint. Update their forces and
                # reset the CG iteration.
                p_r[comp_mask] += tau * nc_r * g_r
                delta = 0
                delta_str = 'sd'
            else:
                delta = 1
                delta_str = 'cg'

        # Check convergence respective pressure
        converged = True
        psum = -reduction.sum(p_r[comp_mask])
        if external_force is not None:
            converged = abs(psum - external_force) < prestol

        # Compute new displacements from updated forces
        # u_r = -np.fft.ifft2(gf_q*np.fft.fft2(p_r)).real
        new_u_r = substrate.evaluate_disp(p_r)
        maxdu = reduction.max(abs(new_u_r - u_r))
        u_r = new_u_r
        result.nfev += 1

        # Store G for next step
        G_old = G

        # Compute root-mean square penetration, max penetration and max force
        # difference between the steps
        if A_cg > 0:
            rms_pen = sqrt(G / A_cg)
        else:
            rms_pen = sqrt(G)
        max_pen = max(0.0,
                      reduction.max(c_r[comp_mask] * (surface[surf_mask] +
                                                      offset -
                                                      u_r[comp_mask])))
        result.maxcv = {"max_pen": max_pen,
                        "max_pres": max_pres}

        # Elastic energy would be
        # e_el = -0.5*reduction.sum(p_r*u_r)

        if delta_str == 'mix':
            converged = converged and maxdu < pentol and \
                        max_pres < prestol and pad_pres < prestol
        else:
            converged = converged and rms_pen < pentol and \
                        max_pen < pentol and maxdu < pentol and \
                        max_pres < prestol and pad_pres < prestol

        log_headers = ['status', 'it', 'area', 'frac. area', 'total force',
                       'offset']
        log_values = [delta_str, it, A_contact,
                      A_contact / reduction.sum(surf_mask * 1), psum,
                      offset]

        if hardness:
            log_headers += ['plast. area', 'frac.plast. area']
            log_values += [A_fl, A_fl / reduction.sum(surf_mask * 1)]
        if verbose:
            log_headers += ['rms pen.', 'max. pen.', 'max. force',
                            'max. pad force', 'max. du', 'CG area',
                            'frac. CG area', 'sum(nc_r)']
            log_values += [rms_pen, max_pen, max_pres, pad_pres, maxdu, A_cg,
                           A_cg / reduction.sum(surf_mask * 1),
                           reduction.sum(nc_r * 1)]
            if delta_str == 'mix':
                log_headers += ['mixfac']
                log_values += [mixfac]
            else:
                log_headers += ['tau']
                log_values += [tau]

        if converged and delta_str == 'mix':
            delta_str = 'mixconv'
            log_values[0] = delta_str
            mixfac = 0.5
        elif converged:
            if logger is not None:
                log_values[0] = 'CONVERGED'
                logger.st(log_headers, log_values, force_print=True)
            # Return full u_r because this is required to reproduce pressure
            # from evalualte_force
            result.x = u_r  # [comp_mask]
            # Return partial p_r because pressure outside computational region
            # is zero anyway
            result.jac = -p_r[tuple(comp_slice)]
            result.active_set = c_r
            # Compute elastic energy
            result.fun = -reduction.sum(
                p_r[tuple(comp_slice)] * u_r[tuple(comp_slice)]) / 2
            result.offset = offset
            result.success = True
            result.message = "Polonsky converged"
            return result

        if logger is not None and it < maxiter:
            logger.st(log_headers, log_values)
        if callback is not None:
            d = dict(area=np.int64(A_contact).item(),
                     fractional_area=np.float64(
                         A_contact / reduction.sum(surf_mask)).item(),
                     rms_penetration=np.float64(rms_pen).item(),
                     max_penetration=np.float64(max_pen).item(),
                     max_pressure=np.float64(max_pres).item(),
                     pad_pressure=np.float64(pad_pres).item(),
                     penetration_tol=np.float64(pentol).item(),
                     pressure_tol=np.float64(prestol).item())
            callback(it, p_r, d)

        if isnan(G) or isnan(rms_pen):
            raise RuntimeError('nan encountered.')

    if logger is not None:
        log_values[0] = 'NOT CONVERGED'
        logger.st(log_headers, log_values, force_print=True)

    # Return full u_r because this is required to reproduce pressure
    # from evalualte_force
    result.x = u_r  # [comp_mask]
    # Return partial p_r because pressure outside computational region
    # is zero anyway
    result.jac = -p_r[tuple(comp_slice)]
    result.active_set = c_r
    # Compute elastic energy
    result.fun = -reduction.sum(
        (p_r[tuple(comp_slice)] * u_r[tuple(comp_slice)])) / 2
    result.offset = offset
    result.message = "Reached maxiter = {}".format(maxiter)
    return result
def test_a_clean_way(comm):
    pnp = Reduction(comm)
    pnp.all(True)
def test_z_continue_another_test(comm):
    # however rank 0 only fails on this test.
    pnp = Reduction(comm)
    pnp.sum(np.array([3, 4]))
    assert True
class PeriodicFFTElasticHalfSpace(ElasticSubstrate):
    """ Uses the FFT to solve the displacements and stresses in an elastic
        Halfspace due to a given array of point forces. This halfspace
        implementation cheats somewhat: since a net pressure would result in
        infinite displacement, the first term of the FFT is systematically
        dropped.
        The implementation follows the description in Stanley & Kato J. Tribol.
        119(3), 481-485 (Jul 01, 1997)
    """

    name = "periodic_fft_elastic_halfspace"
    _periodic = True

    def __init__(self,
                 nb_grid_pts,
                 young,
                 physical_sizes=2 * np.pi,
                 stiffness_q0=None,
                 thickness=None,
                 poisson=0.0,
                 superclass=True,
                 fft="serial",
                 communicator=None):
        """
        Parameters
        ----------
        nb_grid_pts : int tuple
            containing number of points in spatial directions.
            The length of the tuple determines the spatial dimension
            of the problem.
        young : float
            Young's modulus, if poisson is not specified it is the
            contact modulus as defined in Johnson, Contact Mechanics
        physical_sizes : float or float tuple
            (default 2π) domain size.
            For multidimensional problems,
            a tuple can be provided to specify the lengths per
            dimension. If the tuple has less entries than dimensions,
            the last value in repeated.
        stiffness_q0 : float, optional
            Substrate stiffness at the Gamma-point (wavevector q=0).
            If None, this is taken equal to the lowest nonvanishing
            stiffness. Cannot be used in combination with thickness.
        thickness : float, optional
            Thickness of the elastic half-space. If None, this
            models an infinitely deep half-space. Cannot be used in
            combination with stiffness_q0.
        poisson : float
            Default 0
             Poisson number. Need only be specified for substrates
             of finite thickness. If left unspecified for substrates
             of infinite thickness, then young is the contact
             modulus.
        superclass : bool
            (default True)
            client software never uses this.
            Only inheriting subclasses use this.
        fft: string
            Default: 'serial'
            FFT engine to use. Options are 'fftw', 'fftwmpi', 'pfft' and
            'p3dfft'. 'serial' and 'mpi' can also be specified, where the
            choice of the appropriate fft is made by muFFT
        communicator : mpi4py communicator or NuMPI stub communicator
            MPI communicator object.
        """
        super().__init__()
        if not hasattr(nb_grid_pts, "__iter__"):
            nb_grid_pts = (nb_grid_pts, )
        if not hasattr(physical_sizes, "__iter__"):
            physical_sizes = (physical_sizes, )
        self.__dim = len(nb_grid_pts)
        if self.dim not in (1, 2):
            raise self.Error(
                ("Dimension of this problem is {}. Only 1 and 2-dimensional "
                 "problems are supported").format(self.dim))
        if stiffness_q0 is not None and thickness is not None:
            raise self.Error("Please specify either stiffness_q0 or thickness "
                             "or neither.")
        self._nb_grid_pts = nb_grid_pts
        tmpsize = list()
        for i in range(self.dim):
            tmpsize.append(physical_sizes[min(i, len(physical_sizes) - 1)])
        self._physical_sizes = tuple(tmpsize)

        try:
            self._steps = tuple(
                float(size) / res
                for size, res in zip(self.physical_sizes, self.nb_grid_pts))
        except ZeroDivisionError as err:
            raise ZeroDivisionError(
                ("{}, when trying to handle "
                 "    self._steps = tuple("
                 "        float(physical_sizes)/res for physical_sizes, res in"
                 "        zip(self.physical_sizes, self.nb_grid_pts))"
                 "Parameters: self.physical_sizes = {}, self.nb_grid_pts = {}"
                 "").format(err, self.physical_sizes, self.nb_grid_pts))
        self.young = young
        self.poisson = poisson
        self.contact_modulus = young / (1 - poisson**2)
        self.stiffness_q0 = stiffness_q0
        self.thickness = thickness

        self.fftengine = FFT(self.nb_domain_grid_pts,
                             fft=fft,
                             communicator=communicator,
                             allow_temporary_buffer=False,
                             allow_destroy_input=True)
        # Allocate buffers and create plan for one degree of freedom
        self.real_buffer = self.fftengine.register_real_space_field(
            "real-space", 1)
        self.fourier_buffer = self.fftengine.register_fourier_space_field(
            "fourier-space", 1)

        self.greens_function = None
        self.surface_stiffness = None

        self._communicator = communicator
        self.pnp = Reduction(communicator)

        if superclass:
            self.greens_function = self._compute_greens_function()
            self.surface_stiffness = self._compute_surface_stiffness()

    @property
    def dim(self, ):
        "return the substrate's physical dimension"
        return self.__dim

    @property
    def nb_grid_pts(self):
        return self._nb_grid_pts

    @property
    def area_per_pt(self):
        return np.prod(self.physical_sizes) / np.prod(self.nb_grid_pts)

    @property
    def physical_sizes(self):
        return self._physical_sizes

    @property
    def nb_domain_grid_pts(self, ):
        """
        usually, the nb_grid_pts of the system is equal to the geometric
        nb_grid_pts (of the surface). For example free boundary conditions,
        require the computational nb_grid_pts to differ from the geometric one,
        see FreeFFTElasticHalfSpace.
        """
        return self.nb_grid_pts

    @property
    def nb_subdomain_grid_pts(self):
        """
        When working in Parallel one processor holds only Part of the Data

        :return:
        """
        return self.fftengine.nb_subdomain_grid_pts

    @property
    def topography_nb_subdomain_grid_pts(self):
        return self.nb_subdomain_grid_pts

    @property
    def subdomain_locations(self):
        """
        When working in Parallel one processor holds only Part of the Data

        :return:
        """
        return self.fftengine.subdomain_locations

    @property
    def topography_subdomain_locations(self):
        return self.subdomain_locations

    @property
    def subdomain_slices(self):
        """
        When working in Parallel one processor holds only Part of the Data

        :return:
        """
        return self.fftengine.subdomain_slices

    @property
    def topography_subdomain_slices(self):
        return tuple([
            slice(s, s + n)
            for s, n in zip(self.topography_subdomain_locations,
                            self.topography_nb_subdomain_grid_pts)
        ])

    @property
    def local_topography_subdomain_slices(self):
        """
        slice representing the local subdomain without the padding area
        """
        return tuple(
            [slice(0, n) for n in self.topography_nb_subdomain_grid_pts])

    @property
    def nb_fourier_grid_pts(self):
        """
        When working in Parallel one processor holds only Part of the Data

        :return:
        """
        return self.fftengine.nb_fourier_grid_pts

    @property
    def fourier_locations(self):
        """
        When working in Parallel one processor holds only Part of the Data

        :return:
        """
        return self.fftengine.fourier_locations

    @property
    def fourier_slices(self):
        """
        When working in Parallel one processor holds only Part of the Data

        :return:
        """
        return self.fftengine.fourier_slices

    @property
    def communicator(self):
        """Return the MPI communicator"""
        return self._communicator

    def __repr__(self):
        dims = 'x', 'y', 'z'
        size_str = ', '.join('{}: {}({})'.format(dim, size, nb_grid_pts)
                             for dim, size, nb_grid_pts in zip(
                                 dims, self.physical_sizes, self.nb_grid_pts))
        return "{0.dim}-dimensional halfspace '{0.name}', " \
               "physical_sizes(nb_grid_pts) in {1}, E' = {0.young}" \
            .format(self, size_str)

    def _compute_greens_function(self):
        r"""
        Compute the weights w relating fft(displacement) to fft(pressure):
        fft(u) = w*fft(p), see (6) Stanley & Kato J. Tribol. 119(3), 481-485
        (Jul 01, 1997).

        For the infinite halfspace,
        .. math ::

            w = q E^* / 2

        q is the wavevector (:math:`2 \pi / wavelength`)

        WARNING: the paper is dimensionally *incorrect*. see for the correct
        1D formulation: Section 13.2 in
            K. L. Johnson. (1985). Contact Mechanics. [Online]. Cambridge:
            Cambridge  University Press. Available from: Cambridge Books Online
            <http://dx.doi.org/10.1017/CBO9781139171731> [Accessed 16 February
            2015]
        for correct 2D formulation: Appendix 1, eq A.2 in
            Johnson, Greenwood and Higginson, "The Contact of Elastic Regular
            Wavy surfaces", Int. J. Mech. Sci. Vol. 27 No. 6, pp. 383-396, 1985
            <http://dx.doi.org/10.1016/0020-7403(85)90029-3> [Accessed 18 March
            2015]
        """
        if self.dim == 1:
            nx, = self.nb_grid_pts
            sx, = self.physical_sizes
            # Note: q-values from 0 to 1, not from 0 to 2*pi
            qx = np.arange(self.fourier_locations[0],
                           self.fourier_locations[0] +
                           self.nb_fourier_grid_pts[0],
                           dtype=np.float64)
            qx = np.where(qx <= nx // 2, qx / sx, (nx - qx) / sx)
            surface_stiffness = np.pi * self.contact_modulus * qx

            if self.stiffness_q0 is None:
                surface_stiffness[0] = surface_stiffness[1].real
            elif self.stiffness_q0 == 0.0:
                surface_stiffness[0] = 1.0
            else:
                surface_stiffness[0] = self.stiffness_q0

            greens_function = 1 / surface_stiffness
            if self.fourier_locations == (0, ):
                if self.stiffness_q0 == 0.0:
                    greens_function[0, 0] = 0.0

        elif self.dim == 2:
            if np.prod(self.nb_fourier_grid_pts) == 0:
                greens_function = np.zeros(self.nb_fourier_grid_pts,
                                           order='f',
                                           dtype=complex)
            else:
                nx, ny = self.nb_grid_pts
                sx, sy = self.physical_sizes
                # Note: q-values from 0 to 1, not from 0 to 2*pi
                qx = np.arange(self.fourier_locations[0],
                               self.fourier_locations[0] +
                               self.nb_fourier_grid_pts[0],
                               dtype=np.float64)
                qx = np.where(qx <= nx // 2, qx / sx, (nx - qx) / sx)
                qy = np.arange(self.fourier_locations[1],
                               self.fourier_locations[1] +
                               self.nb_fourier_grid_pts[1],
                               dtype=np.float64)
                qy = np.where(qy <= ny // 2, qy / sy, (ny - qy) / sy)
                q = np.sqrt((qx * qx).reshape(-1, 1) +
                            (qy * qy).reshape(1, -1))
                if self.fourier_locations == (0, 0):
                    q[0, 0] = np.NaN
                    # q[0,0] has no Impact on the end result,
                    # but q[0,0] =  0 produces runtime Warnings
                    # (because corr[0,0]=inf)
                surface_stiffness = np.pi * self.contact_modulus * q
                #                   E* / 2 (2 \pi / \lambda)
                #                   (q is 1 / lambda, here)
                if self.thickness is not None:
                    # Compute correction for finite thickness
                    q *= 2 * np.pi * self.thickness
                    fac = 3 - 4 * self.poisson
                    off = 4 * self.poisson * (2 * self.poisson - 3) + 5
                    with np.errstate(over="ignore",
                                     invalid="ignore",
                                     divide="ignore"):
                        corr = (fac * np.cosh(2 * q) + 2 * q ** 2 + off) / \
                               (fac * np.sinh(2 * q) - 2 * q)
                    # The expression easily overflows numerically. These are
                    # then q-values that are converged to the infinite system
                    # expression.
                    corr[np.isnan(corr)] = 1.0
                    surface_stiffness *= corr
                    if self.fourier_locations == (0, 0):
                        surface_stiffness[0, 0] = \
                            self.young / self.thickness * \
                            (1 - self.poisson) / ((1 - 2 * self.poisson) *
                                                  (1 + self.poisson))
                else:
                    if self.fourier_locations == (0, 0):
                        if self.stiffness_q0 is None:
                            surface_stiffness[0, 0] = \
                                (surface_stiffness[1, 0].real +
                                 surface_stiffness[0, 1].real) / 2
                        elif self.stiffness_q0 == 0.0:
                            surface_stiffness[0, 0] = 1.0
                        else:
                            surface_stiffness[0, 0] = self.stiffness_q0

                greens_function = 1 / surface_stiffness
                if self.fourier_locations == (0, 0):
                    if self.stiffness_q0 == 0.0:
                        greens_function[0, 0] = 0.0
        return greens_function

    def _compute_surface_stiffness(self):
        """
        Invert the weights w relating fft(displacement) to fft(pressure):
        """
        surface_stiffness = np.zeros(self.nb_fourier_grid_pts,
                                     order='f',
                                     dtype=complex)
        surface_stiffness[self.greens_function != 0] = \
            1. / self.greens_function[self.greens_function != 0]
        return surface_stiffness

    def evaluate_disp(self, forces):
        """ Computes the displacement due to a given force array
        Keyword Arguments:
        forces   -- a numpy array containing point forces (*not* pressures)
        """
        if forces.shape != self.nb_subdomain_grid_pts:
            raise self.Error(
                ("force array has a different shape ({0}) than this "
                 "halfspace's nb_grid_pts ({1})").format(
                     forces.shape, self.nb_subdomain_grid_pts))
        self.real_buffer.array()[...] = -forces
        self.fftengine.fft(self.real_buffer, self.fourier_buffer)
        self.fourier_buffer.array()[...] *= self.greens_function
        self.fftengine.ifft(self.fourier_buffer, self.real_buffer)
        return self.real_buffer.array().real / \
            self.area_per_pt * self.fftengine.normalisation

    def evaluate_force(self, disp):
        """ Computes the force (*not* pressures) due to a given displacement
        array.

        Keyword Arguments:
        disp   -- a numpy array containing point displacements
        """
        if disp.shape != self.nb_subdomain_grid_pts:
            raise self.Error(
                ("displacements array has a different shape ({0}) than "
                 "this halfspace's nb_grid_pts ({1})").format(
                     disp.shape, self.nb_subdomain_grid_pts))
        self.real_buffer.array()[...] = disp
        self.fftengine.fft(self.real_buffer, self.fourier_buffer)
        self.fourier_buffer.array()[...] *= self.surface_stiffness
        self.fftengine.ifft(self.fourier_buffer, self.real_buffer)
        return -self.real_buffer.array().real * \
            self.area_per_pt * self.fftengine.normalisation

    def evaluate_k_disp(self, forces):
        """ Computes the K-space displacement due to a given force array
        Keyword Arguments:
        forces   -- a numpy array containing point forces (*not* pressures)
        """
        if forces.shape != self.nb_subdomain_grid_pts:
            raise self.Error(
                ("force array has a different shape ({0}) than this halfspace'"
                 "s nb_grid_pts ({1})").format(
                     forces.shape, self.nb_subdomain_grid_pts))  # nopep8
        self.real_buffer.array()[...] = -forces
        self.fftengine.fft(self.real_buffer, self.fourier_buffer)
        return self.greens_function * \
            self.fourier_buffer.array() / self.area_per_pt

    def evaluate_k_force(self, disp):
        """ Computes the K-space forces (*not* pressures) due to a given
        displacement array.

        Keyword Arguments:
        disp   -- a numpy array containing point displacements
        """
        if disp.shape != self.nb_subdomain_grid_pts:
            raise self.Error(
                ("displacements array has a different shape ({0}) than this "
                 "halfspace's nb_grid_pts ({1})").format(
                     disp.shape, self.nb_subdomain_grid_pts))  # nopep8
        self.real_buffer.array()[...] = disp
        self.fftengine.fft(self.real_buffer, self.fourier_buffer)
        return -self.surface_stiffness * \
            self.fourier_buffer.array() * self.area_per_pt

    def evaluate_k_force_k(self, disp_k):
        """ Computes the K-space forces (*not* pressures) due to a given
        displacement array.

        Parameters:
        -----------
        disp_k: complex nd_array
            a numpy array containing the rfft of point displacements
        """

        return -self.surface_stiffness * disp_k * self.area_per_pt

    def evaluate_elastic_energy(self, forces, disp):
        """
        computes and returns the elastic energy due to forces and displacements
        Arguments:
        forces -- array of forces
        disp   -- array of displacements
        """
        # pylint: disable=no-self-use
        return .5 * self.pnp.dot(np.ravel(disp), np.ravel(-forces))

    def evaluate_scalar_product_k_space(self, ka, kb):
        r"""
        Computes the scalar product, i.e. the power, between the `a` and `b`,
        given their fourier representation.

        `Power theorem
        <https://ccrma.stanford.edu/~jos/mdft/Power_Theorem.html>`_:

        .. math ::

            P = \sum_{ij} a_{ij} b_{ij} =
                \frac{1}{n_x n_y}\sum_{ij}
                \tilde a_{ij} \overline{\tilde b_{ij}}

        Note that for `a`, `b` real,

        .. math :: P = \sum_{kl} Re(\tilde a_{kl}) Re(\tilde b_{kl})
        + Im(\tilde a_{kl}) Im(\tilde b_{kl})


        Parameters
        ----------
        ka, kb:
            arrays of complex type and of size substrate.nb_fourier_grid_pts
            Fourier representation (output of a 2D rfftn) `a` (resp. `b`)
            (`nx, ny` real array)


        Returns
        -------
        P
            The scalar product of a and b

        """

        # ka and kb are the output of the 2D rfftn, that means the a
        # part of the transform is omitted because of the symetry along the
        # last dimension
        #
        # That's why the components whose symetrics have been omitted are
        # weighted with a factor of 2.
        #
        # The first column (indexes [...,0], wavevector 0 along the last
        # dimension) has no symetric
        #
        # When the number of points in the last dimension is even, the last
        # column (Nyquist Frequency) has also no symetric.
        #
        # The serial code implementation would look like this
        # if (self.nb_domain_grid_pts[-1] % 2 == 0)
        #   return .5*(np.vdot(ka, kb).real +
        #           # adding the data that has been omitted by rfftn
        #           np.vdot(ka[..., 1:-1], kb[..., 1:-1]).real
        #           # because of symetry
        #           )/self.nb_pts
        # else :
        #   return .5 * (np.vdot(ka, kb).real +
        #                  # adding the data that has been omitted by rfftn
        #      #           np.vdot(ka[..., 1:], kb[..., 1:]).real
        #      #           # because of symetry
        #      #           )/self.nb_pts
        #
        # Parallelized Version
        # The inner part of the fourier data should always be symetrized (i.e.
        # multiplied by 2). When the fourier subdomain contains boundary values
        # (wavevector 0 (even and odd) and ny//2 (only for odd)) these values
        # should only be added once

        if ka.size > 0:
            if self.fourier_locations[0] == 0:
                # First row of this fourier data is first of global data
                fact0 = 1
            elif self.nb_fourier_grid_pts[0] > 1:
                # local first row is not the first in the global data
                fact0 = 2
            else:
                fact0 = 0

            if self.fourier_locations[0] == 0 and \
                    self.nb_fourier_grid_pts[0] == 1:
                factend = 0
            elif (self.nb_domain_grid_pts[0] % 2 == 1):
                # odd number of points, last row have always to be symmetrized
                factend = 2
            elif self.fourier_locations[0] + \
                    self.nb_fourier_grid_pts[0] - 1 == \
                    self.nb_domain_grid_pts[0] // 2:
                # last row of the global rfftn already contains it's symmetric
                factend = 1
                # print("last Element of the even data has to be accounted
                # only once")
            else:
                factend = 2
                # print("last element of this local slice is not last element
                # of the total global data")
            # print("fact0={}".format(fact0))
            # print("factend={}".format(factend))

            if self.nb_fourier_grid_pts[0] > 2:
                factmiddle = 2
            else:
                factmiddle = 0

            # vdot(a, b) = conj(a) .  b
            locsum = (factmiddle * np.vdot(ka[1:-1, ...], kb[1:-1, ...]).real +
                      fact0 * np.vdot(ka[0, ...], kb[0, ...]).real + factend *
                      np.vdot(ka[-1, ...], kb[-1, ...]).real) / np.prod(
                          self.nb_domain_grid_pts)  # nopep8
            # We divide by the total number of points to get the appropriate
            # normalisation of the Fourier transform (in numpy the division by
            # happens only at the inverse transform)
        else:
            # This handles the case where the processor holds an empty
            # subdomain
            locsum = np.array([], dtype=ka.real.dtype)
        # print(locsum)
        return self.pnp.sum(locsum)

    def evaluate_elastic_energy_k_space(self, kforces, kdisp):
        r"""
        Computes the Energy due to forces and displacements using their Fourier
        representation.

        .. math ::
        
            E_{el} &= - \frac{1}{2} \sum_{ij} u_{ij} f_{ij}  

                   &= - \frac{1}{2} \frac{1}{n_x n_y} \sum_{kl} \tilde u{kl} \overline{\tilde f_{kl}} 
        (:math:`\tilde f_{ij} = - \tilde K_{ijkl} u`)
        
        In a parallelized code kforces and kdisp contain only the slice 
        attributed to this processor
        
        
        Parameters
        ----------
        kforces: 
            array of complex type and of size substrate.nb_fourier_grid_pts
            Fourier representation (output of a 2D rfftn) of the forces acting on the grid points
        kdisp: 
            array of complex type and of physical_sizes substrate.nb_fourier_grid_pts
            Fourier representation (output of a 2D rfftn) of the displacements of the grid points


        Returns
        -------
        E
            The elastic energy due to the forces and displacements
        """  # noqa: E501, W291, W293

        return -0.5 * self.evaluate_scalar_product_k_space(kdisp, kforces)

    def evaluate(self, disp, pot=True, forces=False):
        """Evaluates the elastic energy and the point forces
        Keyword Arguments:
        disp   -- array of distances
        pot    -- (default True) if true, returns potential energy
        forces -- (default False) if true, returns forces
        """
        force = potential = None
        if forces:
            force = self.evaluate_force(disp)
            if pot:
                potential = self.evaluate_elastic_energy(force, disp)
        elif pot:
            kforce = self.evaluate_k_force(disp)
            # TODO: OPTIMISATION: here kdisp is computed twice, because it's
            #  needed in kforce
            self.real_buffer.array()[...] = disp
            self.fftengine.fft(self.real_buffer, self.fourier_buffer)
            potential = self.evaluate_elastic_energy_k_space(
                kforce, self.fourier_buffer.array())
        return potential, force

    def evaluate_k(self, disp_k, pot=True, forces=False):
        """Evaluates the elastic energy and the point forces
        Keyword Arguments:
        disp   -- array of distances
        pot    -- (default True) if true, returns potential energy
        forces -- (default False) if true, returns forces
        """
        potential = None
        if forces:
            force_k = self.evaluate_k_force_k(disp_k)
            if pot:
                potential = self.evaluate_elastic_energy_k_space(
                    force_k, disp_k)
        elif pot:
            force_k = self.evaluate_k_force_k(disp_k)
            potential = self.evaluate_elastic_energy_k_space(force_k, disp_k)
        return potential, force_k
 def has_undefined_data(self):
     reduction = Reduction(self.communicator)
     return reduction.any(
         np.ma.getmask(
             self._heights) is not np.ma.nomask and np.ma.getmask(
             self._heights).sum() > 0)
    def __init__(self,
                 nb_grid_pts,
                 young,
                 physical_sizes=2 * np.pi,
                 stiffness_q0=None,
                 thickness=None,
                 poisson=0.0,
                 superclass=True,
                 fft="serial",
                 communicator=None):
        """
        Parameters
        ----------
        nb_grid_pts : int tuple
            containing number of points in spatial directions.
            The length of the tuple determines the spatial dimension
            of the problem.
        young : float
            Young's modulus, if poisson is not specified it is the
            contact modulus as defined in Johnson, Contact Mechanics
        physical_sizes : float or float tuple
            (default 2π) domain size.
            For multidimensional problems,
            a tuple can be provided to specify the lengths per
            dimension. If the tuple has less entries than dimensions,
            the last value in repeated.
        stiffness_q0 : float, optional
            Substrate stiffness at the Gamma-point (wavevector q=0).
            If None, this is taken equal to the lowest nonvanishing
            stiffness. Cannot be used in combination with thickness.
        thickness : float, optional
            Thickness of the elastic half-space. If None, this
            models an infinitely deep half-space. Cannot be used in
            combination with stiffness_q0.
        poisson : float
            Default 0
             Poisson number. Need only be specified for substrates
             of finite thickness. If left unspecified for substrates
             of infinite thickness, then young is the contact
             modulus.
        superclass : bool
            (default True)
            client software never uses this.
            Only inheriting subclasses use this.
        fft: string
            Default: 'serial'
            FFT engine to use. Options are 'fftw', 'fftwmpi', 'pfft' and
            'p3dfft'. 'serial' and 'mpi' can also be specified, where the
            choice of the appropriate fft is made by muFFT
        communicator : mpi4py communicator or NuMPI stub communicator
            MPI communicator object.
        """
        super().__init__()
        if not hasattr(nb_grid_pts, "__iter__"):
            nb_grid_pts = (nb_grid_pts, )
        if not hasattr(physical_sizes, "__iter__"):
            physical_sizes = (physical_sizes, )
        self.__dim = len(nb_grid_pts)
        if self.dim not in (1, 2):
            raise self.Error(
                ("Dimension of this problem is {}. Only 1 and 2-dimensional "
                 "problems are supported").format(self.dim))
        if stiffness_q0 is not None and thickness is not None:
            raise self.Error("Please specify either stiffness_q0 or thickness "
                             "or neither.")
        self._nb_grid_pts = nb_grid_pts
        tmpsize = list()
        for i in range(self.dim):
            tmpsize.append(physical_sizes[min(i, len(physical_sizes) - 1)])
        self._physical_sizes = tuple(tmpsize)

        try:
            self._steps = tuple(
                float(size) / res
                for size, res in zip(self.physical_sizes, self.nb_grid_pts))
        except ZeroDivisionError as err:
            raise ZeroDivisionError(
                ("{}, when trying to handle "
                 "    self._steps = tuple("
                 "        float(physical_sizes)/res for physical_sizes, res in"
                 "        zip(self.physical_sizes, self.nb_grid_pts))"
                 "Parameters: self.physical_sizes = {}, self.nb_grid_pts = {}"
                 "").format(err, self.physical_sizes, self.nb_grid_pts))
        self.young = young
        self.poisson = poisson
        self.contact_modulus = young / (1 - poisson**2)
        self.stiffness_q0 = stiffness_q0
        self.thickness = thickness

        self.fftengine = FFT(self.nb_domain_grid_pts,
                             fft=fft,
                             communicator=communicator,
                             allow_temporary_buffer=False,
                             allow_destroy_input=True)
        # Allocate buffers and create plan for one degree of freedom
        self.real_buffer = self.fftengine.register_real_space_field(
            "real-space", 1)
        self.fourier_buffer = self.fftengine.register_fourier_space_field(
            "fourier-space", 1)

        self.greens_function = None
        self.surface_stiffness = None

        self._communicator = communicator
        self.pnp = Reduction(communicator)

        if superclass:
            self.greens_function = self._compute_greens_function()
            self.surface_stiffness = self._compute_surface_stiffness()
예제 #19
0
 def plastic_area(self):
     pnp = Reduction(self._communicator)
     return pnp.sum(np.count_nonzero(self.__h_pl)) * self.area_per_pt
def LBFGS(fun,
          x,
          args=(),
          jac=None,
          x_old=None,
          maxcor=10,
          gtol=1e-5,
          ftol=2.2e-9,
          maxiter=15000,
          maxls=20,
          linesearch_options=dict(c1=1e-3, c2=0.9),
          pnp=Reduction(MPI.COMM_WORLD),
          store_iterates=None,
          printdb=donothing,
          callback=None,
          **options):
    """

    convergence if |grad|_{\infty} <= gtol or <= ftol is satisfied

    Parameters
    ----------
    fun
    x
    args
    jac
    x_old: initial guess
    maxcor: max number of history gradients stored
    gtol:
    ftol:
    maxiter
    maxls, default 20, as in scipy.optimize.fmin_l_bfgs_b
    linesearch_options: further options for the linesearch
    the result of the linesearch has to satisfy the strong wolfe condition
    See Wright and Nocedal, 'Numerical Optimization',p.34
    c1 parameter for the sufficient decrease condition
    c2 parameter for the curvature condition
    
    default values are choosen here to match the implementation in
    the Fortran subroutine L-BFGS-B 3.0 by
    Ciyou Zhu, Richard Byrd, and Jorge Nocedal

    See lbfgsb.f line 2497, with gtol=c2 and ftol=c1
    
    pnp
    store_iterates: stores each iterate of x, only debugging
    printdb
    options

    Returns
    -------

    """

    if callback is None:
        callback = donothing

    #print("jac = {}, type = {}".format(jac, type(jac)))
    if jac is True:

        def fun_grad(x):
            """
            reshapes the gradient in a convenient form
            Parameters
            ----------
            x

            Returns
            -------

            """
            f, grad = fun(x)
            return f, grad.reshape((-1, 1))

    elif jac is False:
        raise NotImplementedError(
            "Numerical evaluation of gradient not implemented")
    else:
        # function and gradient provided sepatately

        def fun_grad(x):
            """
            evaluates function and grad consequently (important, see issue #13)
            and reshapes the gradient in a convenient form
            Parameters
            ----------
            x

            Returns
            -------

            """
            # order matte
            f = fun(x)
            grad = jac(x).reshape((-1, 1))
            return f, grad

    # user can provide x in the shape of his convenience
    # but we will work with the shape (-1, 1)
    original_shape = x.shape

    x = x.reshape((-1, 1))

    if x_old is None:
        x_old = x.copy()
        x, grad, x_old, grad_old, phi, phi_old, derphi = \
            steepest_descent_wolfe2(x_old, fun_grad, pnp=pnp, maxiter=maxls,
                                    **linesearch_options)
    else:
        phi_old, grad_old = fun_grad(
            x_old
        )  # phi_old is never used, except for the convergence criterion
        phi, grad = fun_grad(x)

    # full history of x is sored here if wished
    iterates = list()
    k = 1

    n = x.size  # number of degrees of freedom
    gamma = 1

    S = np.zeros((n, 0))  # history of the steps of x
    Slist = []
    Y = np.zeros((n, 0))  # history of gradient differences
    Ylist = []
    R = np.zeros((0, 0))
    #STgrad = np.array((1, maxcor))
    #YTgrad = np.array((1, maxcor))
    #STgrad_prev = STgrad.copy()  # TODO: preallocate
    #YTgrad_prev = YTgrad.copy()

    grad2 = pnp.sum(grad**2)

    alpha = 0  #line search step size

    # Start loop
    #printdb(k)
    while True:
        # Update Sk,Yk
        #print("k= {}".format(k))
        if k > maxcor:
            #S = np.roll(S, -1)
            #S[:, -1] = (x - x_old).flat

            Slist[:-1] = Slist[1:]
            Slist[-1] = (x - x_old)

            #Y = np.roll(Y, -1)
            #Y[:, -1] = (grad - grad_old).flat

            Ylist[:-1] = Ylist[1:]
            Ylist[-1] = (grad - grad_old)

        else:
            #S = np.hstack([S, x - x_old])
            Slist.append(x - x_old)
            #Y = np.hstack([Y, grad - grad_old])
            Ylist.append(grad - grad_old)

        # 2.
        grad2prev = grad2
        grad2 = pnp.sum(np.asarray(grad)**2)

        ######################
        # check if job is done
        #if ((grad2 < g2tol if g2tol is not None else True) and
        #        (pnp.max(np.abs(grad)) < gtol if gtol is not None else True) and
        #        ((phi - phi_old) / max((1,abs(phi),abs(phi_old))) <= ftol if ftol is not None else True)):
        callback(x)

        if (pnp.max(np.abs(grad)) < gtol):
            result = scipy.optimize.OptimizeResult({
                'success':
                True,
                'x':
                x.reshape(original_shape),
                'fun':
                phi,
                'jac':
                grad.reshape(original_shape),
                'nit':
                k,
                'message':
                'CONVERGENCE: NORM_OF_GRADIENT_<=_GTOL',
                'iterates':
                iterates
            })
            # if iterates:
            #    result['iterates'] = iterates
            return result

        if ((phi_old - phi) <= ftol * max((1, abs(phi), abs(phi_old)))):
            result = scipy.optimize.OptimizeResult({
                'success':
                True,
                'x':
                x.reshape(original_shape),
                'fun':
                phi,
                'jac':
                grad.reshape(original_shape),
                'nit':
                k,
                'message':
                'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH',
                'iterates':
                iterates
            })
            # if iterates:
            #    result['iterates'] = iterates
            return result

        if k > maxiter:
            result = scipy.optimize.OptimizeResult({
                'success':
                False,
                'x':
                x.reshape(original_shape),
                'fun':
                phi,
                'jac':
                grad.reshape(original_shape),
                'nit':
                k,
                'iterates':
                iterates
            })

            return result
        ###########
        # new iteration

        STgrad = np.array([pnp.dot(si.T, grad) for si in Slist]).reshape(-1, 1)
        YTgrad = np.array([pnp.dot(yi.T, grad) for yi in Ylist]).reshape(-1, 1)
        #STgrad = pnp.dot(S.T, grad)
        #YTgrad = pnp.dot(Y.T, grad)

        if k > maxcor:
            #w = np.vstack([STgrad_prev, gamma * YTgrad_prev]) #TODO: vstack
            S_now_T_grad_prev = np.roll(STgrad_prev, -1)
            S_now_T_grad_prev[-1] = -alpha * gamma * grad2prev - alpha * (
                STgrad_prev.T.dot(p1) + gamma * YTgrad_prev.T.dot(p2))
        else:  # straightforward Version
            S_now_T_grad_prev = np.array(
                [pnp.dot(si.T, grad_old) for si in Slist]).reshape(-1, 1)

        if k > maxcor:
            R = np.roll(R, (-1, -1),
                        axis=(0, 1))  # mxm Matrix hold by all Processors
            R[-1, :] = 0
            STym1 = STgrad - S_now_T_grad_prev
            R[:, -1] = STym1.flat  #O(m x n)

        elif k == 1:
            #Rm = np.triu(pnp.dot(S.T, Y))
            R = np.triu(
                np.array([[pnp.dot(si.T, yi).item() for yi in Ylist]
                          for si in Slist]))
            #print(R)
        else:
            #Rm = np.vstack([Rm, np.zeros(k - 1)])
            #Rm = np.hstack([Rm, pnp.dot(S.T, Y[:, -1]).reshape(k, 1)])
            R = np.vstack([R, np.zeros(k - 1)])
            R = np.hstack([
                R,
                np.array([pnp.dot(si.T, Ylist[-1])
                          for si in Slist]).reshape(k, 1)
            ])
        if k > maxcor:
            D = np.roll(D, (-1, -1), axis=(0, 1))
            # D[-1,-1] = np.dot(Y[:,-1],Y[:,-1])# yk-1Tyk-1 # TOOPTIMIZE
            D[-1, -1] = R[-1, -1]
        else:
            #D = np.diag(np.einsum("ik,ik -> k", S, Y))
            D = np.diag(R.diagonal())
        assert D[-1, -1] > 0, "k = {}: ".format(k)  # Assumption of Theorem 2.2

        if k > maxcor:
            YTY = np.roll(YTY, (-1, -1), axis=(0, 1))
            YTY[-1, :-1] = YTY[:-1, -1] = (YTgrad[:-1] - YTgrad_prev[1:]).flat
            YTY[-1, -1] = grad2prev - grad2 + 2 * YTgrad[-1]
        else:
            #YTYm = pnp.dot(Y.T, Y)
            YTY = np.array([[pnp.dot(yi1.T, yi2).item() for yi2 in Ylist]
                            for yi1 in Ylist])
        # Step 5.
        gamma = D[-1, -1] / YTY[-1,
                                -1]  # n.b. D[-1,-1] = sk-1T yk-1 = yk-1T sk-1

        # Step 6. and 7. together
        Rinv = np.linalg.inv(R)
        RiSg = Rinv.dot(STgrad)
        p1 = Rinv.T.dot(D + gamma * YTY).dot(RiSg) - gamma * Rinv.T.dot(YTgrad)
        p2 = -RiSg  #TODO

        #temphstack=np.hstack([S, gamma * Y])

        #Hgradm = gamma * grad + S.dot(p1)  + gamma * Y.dot(p2)
        Hgrad = gamma * grad
        for si, yi, p1i, p2i in zip(Slist, Ylist, p1.flat, p2.flat):
            Hgrad += si * p1i.item() + gamma * yi * p2i.item()

        phi_old = float(phi)
        #printdb("Linesearch: ")
        grad_old[:] = grad
        x_old[:] = x

        def _phi_phiprime(alpha):
            phi, grad[...] = fun_grad(x - Hgrad * alpha)
            phiprime = pnp.dot(grad.T, -Hgrad).item()
            return phi, phiprime

        #TODO: oldphi0: is it allowed to stay outside of the search direction ?
        alpha, phi, phi0, derphi = scalar_search_wolfe2(_phi_phiprime,
                                                        phi0=phi,
                                                        derphi0=pnp.dot(
                                                            grad.T,
                                                            -Hgrad).item(),
                                                        maxiter=maxls,
                                                        **linesearch_options)

        printdb("derphi: {}".format(derphi))
        assert derphi is not None, "line-search did not converge"

        x = x - Hgrad * alpha

        if store_iterates == 'iterate':
            iterate = scipy.optimize.OptimizeResult({
                'x':
                x.copy().reshape(original_shape),
                'fun':
                phi,
                'jac':
                grad.reshape(original_shape)
            })
            iterates.append(iterate)

        printdb("k = {}".format(k))
        k = k + 1

        STgrad_prev = STgrad.copy()
        YTgrad_prev = YTgrad.copy()