def grad_cd1(self, params, inputs, **kwargs): """ """ g = gzeros(params.shape) n, _ = inputs.shape m_end = self.m_end V = self.shape[0] H = self.shape[1] wm = params[:m_end].reshape(self.shape) prec = params[-V:][:, gpu.newaxis] h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True) v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True) h2, _ = self.H(v2, wm=prec*wm, bias=params[m_end:m_end+H]) #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0] # Note the negative sign: the gradient is # supposed to point into 'wrong' direction. g[:m_end] = -gdot(inputs.T*prec, h1).ravel() g[:m_end] += gdot(v_sampled.T*prec, h2).ravel() g[:m_end] *= 1./n g[:m_end] += self.l2*params[:m_end] g[m_end:m_end+H] = -h1.sum(axis=0) g[m_end:m_end+H] += h2.sum(axis=0) g[m_end:m_end+H] *= 1./n g[-2*V:-V] = -inputs.sum(axis=0) g[-2*V:-V] += v_sampled.sum(axis=0) g[-2*V:-V] *= 1./n g[-2*V:-V] *= (prec**2).T #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2) # Gradient for square root of precision g[-V:] = -gsum(2*prec.T*inputs*(params[-2*V:-V] - inputs/2), axis=0) + gsum(gdot(inputs.T, h1)*wm, axis=1) g[-V:] += (gsum(2*prec.T*v_sampled*(params[-2*V:-V] - v_sampled/2), axis=0) + gsum(gdot(v_sampled.T, h2)*wm, axis=1)) g[-V:] *= 1./n #print gsum(g[-V:]**2) if self.lmbd > 0.: if self.rho_hat is None: self.rho_hat = h1.mean(axis=0) else: self.rho_hat *= 0.9 self.rho_hat += 0.1 * h1.mean(axis=0) dKL_drho_hat = (self.rho - self.rho_hat)/(self.rho_hat*(1-self.rho_hat)) h1_1mh1 = h1*(1 - h1) g[m_end:m_end+H] -= self.lmbd/n * gsum(h1_1mh1, axis=0) * dKL_drho_hat g[:m_end] -= self.lmbd/n * (gdot(inputs.T * prec, h1_1mh1) * dKL_drho_hat).ravel() #g[:] = -g[:] return g
def reconstruction(self, params, inputs, **kwargs): """ """ m_end = self.m_end V = self.shape[0] H = self.shape[1] wm = params[:m_end].reshape(self.shape) prec = params[-V:][:, gpu.newaxis] h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True) v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True) rho_hat = h1.sum() rec = gsum((inputs - v_sampled)**2) return np.array([rec, self.lmbd*rho_hat])
def reconstruction(self, params, inputs, **kwargs): """ """ m_end = self.m_end V = self.shape[0] H = self.shape[1] wm = params[:m_end].reshape(self.shape) prec = params[-V:][:, gpu.newaxis] h1, h_sampled = self.H(inputs, wm=prec * wm, bias=params[m_end:m_end + H], sampling=True) v2, v_sampled = gauss(h_sampled, wm=(wm / prec).T, bias=params[-(2 * V):-V], prec=prec.T, sampling=True) rho_hat = h1.sum() rec = gsum((inputs - v_sampled)**2) return np.array([rec, self.lmbd * rho_hat])
def grad_cd1(self, params, inputs, **kwargs): """ """ g = gzeros(params.shape) n, _ = inputs.shape m_end = self.m_end V = self.shape[0] H = self.shape[1] wm = params[:m_end].reshape(self.shape) prec = params[-V:][:, gpu.newaxis] h1, h_sampled = self.H(inputs, wm=prec * wm, bias=params[m_end:m_end + H], sampling=True) v2, v_sampled = gauss(h_sampled, wm=(wm / prec).T, bias=params[-(2 * V):-V], prec=prec.T, sampling=True) h2, _ = self.H(v2, wm=prec * wm, bias=params[m_end:m_end + H]) #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0] # Note the negative sign: the gradient is # supposed to point into 'wrong' direction. g[:m_end] = -gdot(inputs.T * prec, h1).ravel() g[:m_end] += gdot(v_sampled.T * prec, h2).ravel() g[:m_end] *= 1. / n g[:m_end] += self.l2 * params[:m_end] g[m_end:m_end + H] = -h1.sum(axis=0) g[m_end:m_end + H] += h2.sum(axis=0) g[m_end:m_end + H] *= 1. / n g[-2 * V:-V] = -inputs.sum(axis=0) g[-2 * V:-V] += v_sampled.sum(axis=0) g[-2 * V:-V] *= 1. / n g[-2 * V:-V] *= (prec**2).T #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2) # Gradient for square root of precision g[-V:] = -gsum(2 * prec.T * inputs * (params[-2 * V:-V] - inputs / 2), axis=0) + gsum(gdot(inputs.T, h1) * wm, axis=1) g[-V:] += (gsum(2 * prec.T * v_sampled * (params[-2 * V:-V] - v_sampled / 2), axis=0) + gsum(gdot(v_sampled.T, h2) * wm, axis=1)) g[-V:] *= 1. / n #print gsum(g[-V:]**2) if self.lmbd > 0.: if self.rho_hat is None: self.rho_hat = h1.mean(axis=0) else: self.rho_hat *= 0.9 self.rho_hat += 0.1 * h1.mean(axis=0) dKL_drho_hat = (self.rho - self.rho_hat) / (self.rho_hat * (1 - self.rho_hat)) h1_1mh1 = h1 * (1 - h1) g[m_end:m_end + H] -= self.lmbd / n * gsum(h1_1mh1, axis=0) * dKL_drho_hat g[:m_end] -= self.lmbd / n * (gdot(inputs.T * prec, h1_1mh1) * dKL_drho_hat).ravel() #g[:] = -g[:] return g