def gradfun(der_loc_vals, der_logs_ave): par_dims = der_loc_vals.ndim - 1 _lloc_r = lloc_r.reshape((n_samples_node, ) + tuple(1 for i in range(par_dims))) grad = _mean(der_loc_vals.conjugate() * _lloc_r, axis=0) - ( der_logs_ave.conjugate() * self._loss_stats.mean) return grad
def stats(self): stats = {} accept = self._accepted_samples / float(self._total_samples) stats["mean_acceptance"] = _mean(accept) stats["min_acceptance"] = _mean(accept.min()) stats["max_acceptance"] = _mean(accept.max()) # Average position of beta=1 # This is normalized and centered around zero # In the ideal case the average should be zero stats["normalized_beta=1_position"] = ( self._beta_stats[1] / float(self._n_replicas - 1) - 0.5) # Average variance on the position of beta=1 # In the ideal case this quantity should be of order ~ [0.2, 1] stats["normalized_beta=1_diffusion"] = _np.sqrt( self._beta_stats[2] / self._beta_stats[-1]) / float( self._n_replicas) return stats
def estimate(op): lvs = _local_values(op, psi, samples) stats = _statistics(lvs.T) if compute_gradients: samples_r = samples.reshape((-1, samples.shape[-1])) eloc_r = (lvs - _mean(lvs)).reshape(-1, 1) grad = sampler.machine.vector_jacobian_prod( samples_r, eloc_r / n_samples, ) return stats, grad else: return stats
def _forward_and_backward(self, sample=True): """ Performs a number of VMC optimization steps. Args: n_steps (int): Number of steps to perform. """ if sample: self._sampler.reset() # Burnout phase self._sampler.generate_samples(self._n_discard) # Generate samples and store them self._samples = self._sampler.generate_samples( self._n_samples_node, samples=self._samples) # Compute the local energy estimator and average Energy eloc, self._loss_stats = self._get_mc_stats(self._ham) # Center the local energy eloc -= _mean(eloc) samples_r = self._samples.reshape((-1, self._samples.shape[-1])) eloc_r = eloc.reshape(-1, 1) # Perform update if self._sr: if self._sr.onthefly: self._grads = self._machine.vector_jacobian_prod( samples_r, eloc_r / self._n_samples, self._grads) self._grads = tree_map(_sum_inplace, self._grads) self._dp = self._sr.compute_update_onthefly( samples_r, self._grads, self._dp) else: # When using the SR (Natural gradient) we need to have the full jacobian self._grads, self._jac = self._machine.vector_jacobian_prod( samples_r, eloc_r / self._n_samples, self._grads, return_jacobian=True, ) self._grads = tree_map(_sum_inplace, self._grads) self._dp = self._sr.compute_update(self._jac, self._grads, self._dp) else: # Computing updates using the simple gradient self._grads = self._machine.vector_jacobian_prod( samples_r, eloc_r / self._n_samples, self._grads) self._grads = tree_map(_sum_inplace, self._grads) # if Real pars but complex gradient, take only real part # not necessary for SR because sr already does it. if not self._machine.has_complex_parameters: self._dp = tree_map(lambda x: x.real, self._grads) else: self._dp = self._grads return self._dp
def compute_update(self, oks, grad, out=None): r""" Solves the SR flow equation for the parameter update ẋ. The SR update is computed by solving the linear equation Sẋ = f where S is the covariance matrix of the partial derivatives O_i(v_j) = ∂/∂x_i log Ψ(v_j) and f is a generalized force (the loss gradient). Args: oks: The matrix of log-derivatives, O_i(v_j) grad: The vector of forces f. out: Output array for the update ẋ. """ oks -= _mean(oks, axis=0) if self.has_complex_parameters is None: raise ValueError( "has_complex_parameters not set: this SR object is not properly initialized." ) n_samp = _sum_inplace(_np.atleast_1d(oks.shape[0])) n_par = grad.shape[0] if out is None: out = _np.zeros(n_par, dtype=_np.complex128) if self._has_complex_parameters: if self._use_iterative: op = self._linear_operator(oks, n_samp) if self._x0 is None: self._x0 = _np.zeros(n_par, dtype=_np.complex128) out[:], info = self._sparse_solver( op, grad, x0=self._x0, tol=self.sparse_tol, maxiter=self.sparse_maxiter, ) if info < 0: raise RuntimeError("SR sparse solver did not converge.") self._x0 = out else: self._S = _np.matmul(oks.conj().T, oks, self._S) self._S = _sum_inplace(self._S) self._S /= float(n_samp) self._apply_preconditioning(grad) if self._lsq_solver == "Cholesky": c, low = _cho_factor(self._S, check_finite=False) out[:] = _cho_solve((c, low), grad) else: out[:], residuals, self._last_rank, s_vals = _lstsq( self._S, grad, cond=self._svd_threshold, lapack_driver=self._lapack_driver, ) self._revert_preconditioning(out) else: if self._use_iterative: op = self._linear_operator(oks, n_samp) if self._x0 is None: self._x0 = _np.zeros(n_par) out[:].real, info = self._sparse_solver( op, grad.real, x0=self._x0, tol=self.sparse_tol, maxiter=self.sparse_maxiter, ) if info < 0: raise RuntimeError("SR sparse solver did not converge.") self._x0 = out.real else: self._S = _np.matmul(oks.conj().T, oks, self._S) self._S /= float(n_samp) self._apply_preconditioning(grad) if self._lsq_solver == "Cholesky": c, low = _cho_factor(self._S, check_finite=False) out[:].real = _cho_solve((c, low), grad) else: out[:].real, residuals, self._last_rank, s_vals = _lstsq( self._S.real, grad.real, cond=self._svd_threshold, lapack_driver=self._lapack_driver, ) self._revert_preconditioning(out.real) out.imag.fill(0.0) if _n_nodes > 1: self._comm.bcast(out, root=0) self._comm.barrier() return out
def _forward_and_backward(self): """ Perform one or several iteration steps of the Qsr calculation. In each step, the gradient will be estimated via negative and positive phase and subsequently, the variational parameters will be updated according to the configured method. Args: n_steps (int): Number of steps to perform. """ # Generate samples from the model self._sampler.reset() # Burnout phase for _ in self._sampler.samples(self._n_discard): pass # Generate samples and store them for i, sample in enumerate(self._sampler.samples(self._n_samples_node)): self._samples[i] = sample # Randomly select a batch of training data self._rand_ind = self._get_rand_ind( self._n_samples_data_node, self._n_training_samples ) self._data_samples = self._t_samples[self._rand_ind] self._data_bases = self._bases[self._rand_ind] # Perform update if self._sr: # When using the SR (Natural gradient) we need to have the full jacobian # Computes the jacobian for i, sample in enumerate(self._samples): self._der_logs[i] = self._machine.der_log(sample, out=self._der_logs[i]) grad_neg = _mean(self._der_logs.reshape(-1, self._npar), axis=0).conjugate() # Positive phase driven by the data for x, b_x, grad_x in zip( self._data_samples, self._data_bases, self._data_grads ): self._compute_rotated_grad(x, b_x, grad_x) grad_pos = _mean(self._data_grads, axis=0) grad = 2.0 * (grad_neg - grad_pos) dp = _np.empty(self._npar, dtype=_np.complex128) self._sr.compute_update(self._der_logs.reshape(-1, self._npar), grad, dp) else: # Computing updates using the simple gradient # Negative phase driven by the model vec_ones = _np.ones(self._batch_size, dtype=_np.complex128) / float( self._batch_size ) for x, grad_x in zip(self._samples, self._grads): self._machine.vector_jacobian_prod(x, vec_ones, grad_x) grad_neg = _mean(self._grads, axis=0) # Positive phase driven by the data for x, b_x, grad_x in zip( self._data_samples, self._data_bases, self._data_grads ): self._compute_rotated_grad(x, b_x, grad_x) grad_pos = _mean(self._data_grads, axis=0) dp = 2.0 * (grad_neg - grad_pos) return dp