Exemple #1
0
        def gradfun(der_loc_vals, der_logs_ave):
            par_dims = der_loc_vals.ndim - 1

            _lloc_r = lloc_r.reshape((n_samples_node, ) +
                                     tuple(1 for i in range(par_dims)))

            grad = _mean(der_loc_vals.conjugate() * _lloc_r, axis=0) - (
                der_logs_ave.conjugate() * self._loss_stats.mean)
            return grad
    def stats(self):
        stats = {}
        accept = self._accepted_samples / float(self._total_samples)

        stats["mean_acceptance"] = _mean(accept)
        stats["min_acceptance"] = _mean(accept.min())
        stats["max_acceptance"] = _mean(accept.max())

        # Average position of beta=1
        # This is normalized and centered around zero
        # In the ideal case the average should be zero
        stats["normalized_beta=1_position"] = (
            self._beta_stats[1] / float(self._n_replicas - 1) - 0.5)

        # Average variance on the position of beta=1
        # In the ideal case this quantity should be of order ~ [0.2, 1]
        stats["normalized_beta=1_diffusion"] = _np.sqrt(
            self._beta_stats[2] / self._beta_stats[-1]) / float(
                self._n_replicas)

        return stats
Exemple #3
0
    def estimate(op):
        lvs = _local_values(op, psi, samples)
        stats = _statistics(lvs.T)

        if compute_gradients:
            samples_r = samples.reshape((-1, samples.shape[-1]))
            eloc_r = (lvs - _mean(lvs)).reshape(-1, 1)
            grad = sampler.machine.vector_jacobian_prod(
                samples_r,
                eloc_r / n_samples,
            )
            return stats, grad
        else:
            return stats
Exemple #4
0
    def _forward_and_backward(self, sample=True):
        """
        Performs a number of VMC optimization steps.

        Args:
            n_steps (int): Number of steps to perform.
        """

        if sample:
            self._sampler.reset()

            # Burnout phase
            self._sampler.generate_samples(self._n_discard)

            # Generate samples and store them
            self._samples = self._sampler.generate_samples(
                self._n_samples_node, samples=self._samples)

        # Compute the local energy estimator and average Energy
        eloc, self._loss_stats = self._get_mc_stats(self._ham)

        # Center the local energy
        eloc -= _mean(eloc)

        samples_r = self._samples.reshape((-1, self._samples.shape[-1]))
        eloc_r = eloc.reshape(-1, 1)

        # Perform update
        if self._sr:
            if self._sr.onthefly:

                self._grads = self._machine.vector_jacobian_prod(
                    samples_r, eloc_r / self._n_samples, self._grads)

                self._grads = tree_map(_sum_inplace, self._grads)

                self._dp = self._sr.compute_update_onthefly(
                    samples_r, self._grads, self._dp)

            else:
                # When using the SR (Natural gradient) we need to have the full jacobian
                self._grads, self._jac = self._machine.vector_jacobian_prod(
                    samples_r,
                    eloc_r / self._n_samples,
                    self._grads,
                    return_jacobian=True,
                )

                self._grads = tree_map(_sum_inplace, self._grads)

                self._dp = self._sr.compute_update(self._jac, self._grads,
                                                   self._dp)

        else:
            # Computing updates using the simple gradient
            self._grads = self._machine.vector_jacobian_prod(
                samples_r, eloc_r / self._n_samples, self._grads)

            self._grads = tree_map(_sum_inplace, self._grads)

            #  if Real pars but complex gradient, take only real part
            # not necessary for SR because sr already does it.
            if not self._machine.has_complex_parameters:
                self._dp = tree_map(lambda x: x.real, self._grads)
            else:
                self._dp = self._grads

        return self._dp
Exemple #5
0
    def compute_update(self, oks, grad, out=None):
        r"""
        Solves the SR flow equation for the parameter update ẋ.

        The SR update is computed by solving the linear equation
           Sẋ = f
        where S is the covariance matrix of the partial derivatives
        O_i(v_j) = ∂/∂x_i log Ψ(v_j) and f is a generalized force (the loss
        gradient).

        Args:
            oks: The matrix of log-derivatives,
                O_i(v_j)
            grad: The vector of forces f.
            out: Output array for the update ẋ.
        """

        oks -= _mean(oks, axis=0)

        if self.has_complex_parameters is None:
            raise ValueError(
                "has_complex_parameters not set: this SR object is not properly initialized."
            )

        n_samp = _sum_inplace(_np.atleast_1d(oks.shape[0]))

        n_par = grad.shape[0]

        if out is None:
            out = _np.zeros(n_par, dtype=_np.complex128)

        if self._has_complex_parameters:
            if self._use_iterative:
                op = self._linear_operator(oks, n_samp)

                if self._x0 is None:
                    self._x0 = _np.zeros(n_par, dtype=_np.complex128)

                out[:], info = self._sparse_solver(
                    op,
                    grad,
                    x0=self._x0,
                    tol=self.sparse_tol,
                    maxiter=self.sparse_maxiter,
                )
                if info < 0:
                    raise RuntimeError("SR sparse solver did not converge.")

                self._x0 = out
            else:
                self._S = _np.matmul(oks.conj().T, oks, self._S)
                self._S = _sum_inplace(self._S)
                self._S /= float(n_samp)

                self._apply_preconditioning(grad)

                if self._lsq_solver == "Cholesky":
                    c, low = _cho_factor(self._S, check_finite=False)
                    out[:] = _cho_solve((c, low), grad)

                else:
                    out[:], residuals, self._last_rank, s_vals = _lstsq(
                        self._S,
                        grad,
                        cond=self._svd_threshold,
                        lapack_driver=self._lapack_driver,
                    )

                self._revert_preconditioning(out)

        else:
            if self._use_iterative:
                op = self._linear_operator(oks, n_samp)

                if self._x0 is None:
                    self._x0 = _np.zeros(n_par)

                out[:].real, info = self._sparse_solver(
                    op,
                    grad.real,
                    x0=self._x0,
                    tol=self.sparse_tol,
                    maxiter=self.sparse_maxiter,
                )
                if info < 0:
                    raise RuntimeError("SR sparse solver did not converge.")
                self._x0 = out.real
            else:
                self._S = _np.matmul(oks.conj().T, oks, self._S)
                self._S /= float(n_samp)

                self._apply_preconditioning(grad)

                if self._lsq_solver == "Cholesky":
                    c, low = _cho_factor(self._S, check_finite=False)
                    out[:].real = _cho_solve((c, low), grad)
                else:
                    out[:].real, residuals, self._last_rank, s_vals = _lstsq(
                        self._S.real,
                        grad.real,
                        cond=self._svd_threshold,
                        lapack_driver=self._lapack_driver,
                    )

                self._revert_preconditioning(out.real)

            out.imag.fill(0.0)

        if _n_nodes > 1:
            self._comm.bcast(out, root=0)
            self._comm.barrier()

        return out
Exemple #6
0
    def _forward_and_backward(self):
        """
        Perform one or several iteration steps of the Qsr calculation. In each step,
        the gradient will be estimated via negative and positive phase and subsequently,
        the variational parameters will be updated according to the configured method.

        Args:
            n_steps (int): Number of steps to perform.
        """

        # Generate samples from the model
        self._sampler.reset()

        # Burnout phase
        for _ in self._sampler.samples(self._n_discard):
            pass

        # Generate samples and store them
        for i, sample in enumerate(self._sampler.samples(self._n_samples_node)):
            self._samples[i] = sample

        # Randomly select a batch of training data
        self._rand_ind = self._get_rand_ind(
            self._n_samples_data_node, self._n_training_samples
        )

        self._data_samples = self._t_samples[self._rand_ind]
        self._data_bases = self._bases[self._rand_ind]

        # Perform update
        if self._sr:
            # When using the SR (Natural gradient) we need to have the full jacobian
            # Computes the jacobian
            for i, sample in enumerate(self._samples):
                self._der_logs[i] = self._machine.der_log(sample, out=self._der_logs[i])

            grad_neg = _mean(self._der_logs.reshape(-1, self._npar), axis=0).conjugate()

            # Positive phase driven by the data
            for x, b_x, grad_x in zip(
                self._data_samples, self._data_bases, self._data_grads
            ):
                self._compute_rotated_grad(x, b_x, grad_x)

            grad_pos = _mean(self._data_grads, axis=0)

            grad = 2.0 * (grad_neg - grad_pos)

            dp = _np.empty(self._npar, dtype=_np.complex128)

            self._sr.compute_update(self._der_logs.reshape(-1, self._npar), grad, dp)
        else:
            # Computing updates using the simple gradient

            # Negative phase driven by the model
            vec_ones = _np.ones(self._batch_size, dtype=_np.complex128) / float(
                self._batch_size
            )
            for x, grad_x in zip(self._samples, self._grads):
                self._machine.vector_jacobian_prod(x, vec_ones, grad_x)

            grad_neg = _mean(self._grads, axis=0)

            # Positive phase driven by the data
            for x, b_x, grad_x in zip(
                self._data_samples, self._data_bases, self._data_grads
            ):
                self._compute_rotated_grad(x, b_x, grad_x)

            grad_pos = _mean(self._data_grads, axis=0)

            dp = 2.0 * (grad_neg - grad_pos)

        return dp