def logtheta_1d_phaseI(v, q, d): """ Wraps the RT for 1d subcase with dth order directional gradient d : # of derivatives to take """ # Cutoff if q is not positive definite if (np.real(q[0, 0]) <= 0 or np.isnan(q).any()): print("NaN detected or negative value in phase I: ", q) q[0, 0] = np.abs(q[0, 0]) if (d > 0): D = np.ones((1, d)) R = -d * np.log( ((2j * np.pi))) + RiemannTheta.log_eval(v / (2.0j * np.pi), -q / (2.0j * np.pi), mode=1, epsilon=RTBM_precision, derivs=D) else: # Make NaN safe via moving to fundamental box re = np.divmod(v, q) e = (np.asarray(re[0]) * v - 0.5 * q[0, 0] * np.asarray(re[0])**2)[:, 0] R = e + RiemannTheta.log_eval(re[1] / (2.0j * np.pi), -q / (2.0j * np.pi), mode=1, epsilon=RTBM_precision) return R
def rtbm_log_probability(v, bv, bh, t, w, q, mode=1): """Implements the RTBM probability""" detT = np.linalg.det(t) invT = np.linalg.inv(t) vT = v.T vTv = ((np.matrix(vT) * np.matrix(t)).A * np.matrix(vT).A).sum(1) BvT = bv.T BhT = bh.T Bvv = np.dot(BvT, v) BiTB = np.dot(np.dot(BvT, invT), bv) BtiTW = np.dot(np.dot(BvT, invT), w) WtiTW = np.dot(np.dot(w.T, invT), w) ExpF = -0.5 * vTv - Bvv - 0.5 * BiTB * np.ones(v.shape[1]) lnR1 = RiemannTheta.log_eval((vT.dot(w) + BhT) / (2.0j * np.pi), -q / (2.0j * np.pi), mode, epsilon=RTBM_precision) lnR2 = RiemannTheta.log_eval((BhT - BtiTW) / (2.0j * np.pi), (-q + WtiTW) / (2.0j * np.pi), mode, epsilon=RTBM_precision) return np.log(np.sqrt(detT / (2.0 * np.pi)**(v.shape[0]))) + ExpF + lnR1 - lnR2
def logtheta_1d(v, q, d): """ Wraps the RT for 1d subcase with dth order directional gradient d : # of derivatives to take """ # Cutoff if q is not positive definite if (np.real(q[0, 0]) <= 0 or np.isnan(q).any()): print("NaN detected or negative value in phase I: ", q) q[0, 0] = np.abs(q[0, 0]) if (d > 0): D = np.ones((1, d)) R = -d * np.log( ((2j * np.pi))) + RiemannTheta.log_eval(v / (2.0j * np.pi), -q / (2.0j * np.pi), epsilon=RTBM_precision, derivs=D) else: R = RiemannTheta.log_eval(v / (2.0j * np.pi), -q / (2.0j * np.pi), epsilon=RTBM_precision) return R
def rtbm_parts(v, bv, bh, t, w, q, mode=1): """ Calculates P(v), split into parts """ detT = np.linalg.det(t) invT = np.linalg.inv(t) vT = v.T vTv = ((np.matrix(vT) * np.matrix(t)).A * np.matrix(vT).A).sum(1) BvT = bv.T BhT = bh.T Bvv = np.dot(BvT, v) BiTB = np.dot(np.dot(BvT, invT), bv) BtiTW = np.dot(np.dot(BvT, invT), w) WtiTW = np.dot(np.dot(w.T, invT), w) ExpF = np.exp(-0.5 * vTv - Bvv - 0.5 * BiTB * np.ones(v.shape[1])) uR1, vR1 = RiemannTheta.parts_eval((vT.dot(w) + BhT) / (2.0j * np.pi), -q / (2.0j * np.pi), mode, epsilon=RTBM_precision) uR2, vR2 = RiemannTheta.parts_eval((BhT - BtiTW) / (2.0j * np.pi), (-q + WtiTW) / (2.0j * np.pi), mode, epsilon=RTBM_precision) return (np.sqrt(detT / (2.0 * np.pi)**(v.shape[0])) * ExpF), (vR1 / vR2 * np.exp(uR1 - uR2))
def show_activation(self, N, bound=2): """Plots the Nth activation function on [-bound,+bound]. Args: N (int): the Nth activation function bound (float): min/max value for the plot. """ if (N > self._Nout): print("Node does not exist!") else: D = self._phase * np.linspace(-bound, bound, 1000) D = D.reshape((D.shape[0], 1)) O = np.matrix([[self._q[N - 1, N - 1]]], dtype=complex) if (self._phase == 1): E = -1.0 / (2j * np.pi) * RiemannTheta.normalized_eval( D / (2.0j * np.pi), -O / (2.0j * np.pi), mode=1, derivs=[[1]]) else: E = -1.0 / ( 2j * np.pi) * self._phase * RiemannTheta.normalized_eval( D / (2.0j * np.pi), -O / (2.0j * np.pi), mode=2, derivs=[[1]]) plt.plot(1.0 / self._phase * D.flatten(), E.flatten(), "b-")
def factorized_hidden_expectations(vWb, q, mode=1): """ Implements E(h|v) in factorized form for q diagonal Note: Does not check if q is actual diagonal (for performance) Returns [ E(h_1|v), E(h_2|v), ... ] in vectorized form (each E is an array for the vs) """ Nh = q.shape[0] E = np.zeros((Nh, vWb.shape[0]), dtype=complex) for i in range(Nh): O = np.matrix([[q[i, i]]], dtype=complex) # Cutoff to keep positive definite if (np.real(O[0, 0]) <= 0 or np.isnan(O).any()): print("NaN detected or negative value: ", O) O[0, 0] = np.abs(O[0, 0]) E[i] = -1.0 / (2j * np.pi) * RiemannTheta.normalized_eval( vWb[:, [i]] / (2.0j * np.pi), -O / (2.0j * np.pi), mode=mode, epsilon=RTBM_precision, derivs=[[1]]) return E
def make_sample(self, size, epsilon=RTBM_precision): """Produces P(v) and P(h) samples for the current RTBM architecture. Args: size (int): number of samples to be generated. epsilon (float): threshold for the radius calculation Returns: list of numpy.array: sampling of P(v) list of numpy.array: sampling of P(h) """ invT = np.linalg.inv(self._t) WTiW = self._w.T.dot(invT.dot(self._w)) BvTiW = self._bv.T.dot(invT.dot(self._w)) O = (self._q - WTiW) Z = (self._bh.T - BvTiW) Z = Z.flatten() Omega = np.array(-O / (2.0j * np.pi), dtype=np.complex) Y = Omega.imag RT = RiemannTheta.eval(Z / (2.0j * np.pi), -O / (2.0j * np.pi)) if (Y.shape[0] != 1): _T = np.linalg.cholesky(Y).T else: _T = np.sqrt(Y) T = np.ascontiguousarray(_T) g = len(Z) R = radius(epsilon, _T, derivs=[], accuracy_radius=5.) S = np.ascontiguousarray(integer_points_python(g, R, _T)) pmax = 0 for s in S: v = rtbm_ph(self, s) if v > pmax: pmax = v # Rejection sampling ph = [] while len(ph) < size: U = np.random.randint(0, len(S)) X = (np.exp(-0.5 * S[U].T.dot(O).dot(S[U]) - Z.dot(S[U])) / RT).real J = np.random.uniform() if (X / pmax > J): ph.append(S[U]) # Draw samples from P(v|h) pv = np.zeros(shape=(len(ph), self._bv.shape[0])) for i in range(0, len(ph)): muh = -np.dot(invT, np.dot(self._w, ph[i].reshape(g, 1)) + self._bv) pv[i] = np.random.multivariate_normal(mean=muh.flatten(), cov=invT, size=1).flatten() return pv, ph
def mean(self): """Computes the first moment estimator (mean). Returns: float: the mean of the probability distribution. Raises: theta.rtbm.AssertionError: if ``mode`` is not ``theta.rtbm.RTBM.Mode.Probability``. """ if self._mode is self.Mode.Probability: invT = np.linalg.inv(self._t) BvT = self._bv.T BhT = self._bh.T BtiTW = np.dot(np.dot(BvT, invT), self._w) WtiTW = np.dot(np.dot(self._w.T, invT), self._w) return np.real(-np.dot(invT, self._bv) + 1.0 / (2j * np.pi) * np.dot( np.dot(invT, self._w), RiemannTheta.normalized_eval( (BhT - BtiTW) / (2.0j * np.pi), (-self._q + WtiTW) / (2.0j * np.pi), mode=self._mode, derivs=self._D1))) else: assert AssertionError('Mean for mode %s not implemented' % self._mode)
def gradient_log_1d_theta_phaseII(v, q, d): """ Implements the directional log gradient d : int for direction of gradient """ Nh = q.shape[0] D = np.zeros(Nh) D[d] = 1 R = RiemannTheta(v / (2.0j * np.pi), -q / (2.0j * np.pi), mode=2, epsilon=RTBM_precision) L = RiemannTheta(v / (2.0j * np.pi), -q / (2.0j * np.pi), mode=2, epsilon=RTBM_precision, derivs=[D]) return (-(L / R) / (2.0j * np.pi)).flatten()
def gradient_log_theta(v, q, d): """ Implements the directional log gradient d : int for direction of gradient """ Nh = q.shape[0] D = np.zeros(Nh) D[d] = 1 R = RiemannTheta.log_eval(v / (2.0j * np.pi), -q / (2.0j * np.pi), mode=0, epsilon=RTBM_precision) L = RiemannTheta.log_eval(v / (2.0j * np.pi), -q / (2.0j * np.pi), mode=0, derivs=[D], epsilon=RTBM_precision) return -np.exp(L - R) / (2.0j * np.pi)
def theta_1d(v, q, d): """ Wraps the RT for 1d subcase with dth order directional gradient d : # of derivatives to take """ # Cutoff if q is not positive definite if (np.real(q[0, 0]) <= 0): q[0, 0] = 1e-5 if (d > 0): D = np.ones((1, d)) R = 1.0 / ((2j * np.pi)**d) * RiemannTheta(v / (2.0j * np.pi), -q / (2.0j * np.pi), epsilon=RTBM_precision, derivs=D) else: R = RiemannTheta(v / (2.0j * np.pi), -q / (2.0j * np.pi), epsilon=RTBM_precision) return R
def gradient_log_1d_theta_phaseI(v, q, d): """ Implements the directional log gradient d : int for direction of gradient """ Nh = q.shape[0] D = np.zeros(Nh) D[d] = 1 """ Restrict to unit lattice box """ re = np.divmod(v, q) R = RiemannTheta(re[1] / (2.0j * np.pi), -q / (2.0j * np.pi), mode=1, epsilon=RTBM_precision) L = RiemannTheta(re[1] / (2.0j * np.pi), -q / (2.0j * np.pi), mode=1, epsilon=RTBM_precision, derivs=[D]) return (-(L / R) / (2.0j * np.pi)).flatten() - re[0].flatten()
def rtbm_ph(model, h): invT = np.linalg.inv(model.t) WtiTW = np.dot(np.dot(model.w.T, invT), model.w) QWTW = model.q - WtiTW hTQWTWh = np.dot(np.dot(h.T, QWTW), h) BtiTW = np.dot(np.dot(model.bv.T, invT), model.w) BhT = model.bh.T BBTW = BhT - BtiTW BBTWh = np.dot(BBTW, h) ExpF = np.exp(-0.5 * hTQWTWh - BBTWh) u, v = RiemannTheta.parts_eval((BhT - BtiTW) / (2j * np.pi), (-model.q + WtiTW) / (2j * np.pi), mode=1, epsilon=RTBM_precision) return ExpF / v * np.exp(-u)
def factorized_hidden_expectation_backprop(vWb, q, mode=1): Tn = np.zeros((3, vWb.shape[1], vWb.shape[0]), dtype=complex) for i in range(0, vWb.shape[1]): O = np.matrix([[q[i, i]]], dtype=complex) Tn[:, i, :] = RiemannTheta.normalized_eval(vWb[:, [i]] / (2.0j * np.pi), -O / (2.0j * np.pi), mode=mode, derivs=np.array([[1], [1, 1], [1, 1, 1]]), epsilon=RTBM_precision) return Tn
def backprop(self, E): """Evaluates and stores the gradients for backpropagation. Warning: This method only works with ``diagonal_T=True``. Args: E (numpy.array): the error for backpropagation. Raises: theta.rtbm.RTBM.AssertionError: if ``diagonal_T=False``. """ if self._diagonal_T: vWb = np.transpose(self._X).dot(self._w) + self._bh.T iT = 1.0 / self._t iTW = iT.dot(self._w) # Gradients arg1 = vWb / (2.0j * np.pi) arg2 = -self._q / (2.0j * np.pi) arg3 = (self._bh.T - self._bv.T.dot(iTW)) / (2.0j * np.pi) arg4 = -(self._q - self._w.T.dot(iTW)) / (2.0j * np.pi) coeff1 = 1.0 / (2.0j * np.pi) coeff2 = np.square(coeff1) Da = coeff1 * RiemannTheta.normalized_eval( arg1, arg2, mode=1, derivs=self._D1) Db = coeff1 * RiemannTheta.normalized_eval( arg3, arg4, mode=1, derivs=self._D1) # Hessians DDa = coeff2 * RiemannTheta.normalized_eval( arg1, arg2, mode=1, derivs=self._D2) DDb = coeff2 * RiemannTheta.normalized_eval( arg3, arg4, mode=1, derivs=self._D2) # H from DDb Hb = DDb.flatten().reshape(self._q.shape) np.fill_diagonal(Hb, Hb.diagonal() * 0.5) # Grad Bv self._gradBv = np.mean( E * (self._P * (-self._X - 2.0 * iT.dot(self._bv) + iTW.dot(Db))), axis=1) # Grad Bh self._gradBh = np.mean(E * self._P * (Da - Db), axis=1) # Grad W self._gradW = (E * self._P * self._X).dot( Da.T) / self._X.shape[1] + np.mean(E * self._P, axis=1) * ( self._bv.T.dot(iT).T.dot(Db.T) - 2 * iTW.dot(Hb)) # Grad T iT2 = np.square(iT) self._gradT = np.diag( np.mean( -0.5 * self._P * self._X**2 * E, axis=1)) + np.mean(E * self._P, axis=1) * ( 0.5 * iT + self._bv**2 * iT2 - self._bv * iT2 * self._w.dot(Db) + iT2 * self._w.dot(Hb).dot(self._w.T)) # Grad Q self._gradQ = np.mean(-self._P * (DDa - DDb) * E, axis=1).reshape(self._q.shape) np.fill_diagonal(self._gradQ, self._gradQ.diagonal() * 0.5) else: raise AssertionError( 'Gradients for non-diagonal T not implemented.')