def reconstruction(self, params, inputs, **kwargs): """ """ x, y = inputs n, _ = x.shape weights_xf = params[:self.xf_sz].reshape(self.xfshape) weights_yf = params[self.xf_sz:self._cum_xy].reshape(self.yfshape) weights_fh = params[self._cum_xy:self._cum_xyh].reshape(self.fhshape) bias_h = params[self._cum_xyh:self.size] bias_x = params[self.size:-self.shape[0][1]] bias_y = params[-self.shape[0][1]:] factors_x = gdot(x, weights_xf) factors_y = gdot(y, weights_yf) factors = factors_x * factors_y h, h_sampled = bernoulli(factors, wm=weights_fh, bias=bias_h, sampling=True) rho_hat = h.sum() factors_h = gdot(h, weights_fh.T) way = np.random.rand() > 0.5 if way: # reconstruct y (output) first. tmp = factors_x * factors_h y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y) factors_y[:] = gdot(y1, weights_yf) # then reconstruct x (input). tmp = factors_y * factors_h x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x) else: # reconstruct x (input) first. tmp = factors_y * factors_h x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x) factors_x[:] = gdot(x1, weights_xf) # then reconstruct y (output). tmp = factors_x * factors_h y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y) xrec = gsum((x - x1)**2) yrec = gsum((y - y1)**2) return np.array([xrec, yrec, self.lmbd*rho_hat, self.avg_nxyf, self.avg_nfh])
def cd1_3way_grad(self, params, inputs, **kwargs): """ """ g = gzeros(params.shape) x, y = inputs n, _ = x.shape #print self.avg_nxyf, self.avg_nfh weights_xf = params[:self.xf_sz].reshape(self.xfshape) weights_yf = params[self.xf_sz:self._cum_xy].reshape(self.yfshape) weights_fh = params[self._cum_xy:self._cum_xyh].reshape(self.fhshape) bias_h = params[self._cum_xyh:self.size] bias_x = params[self.size:-self.shape[0][1]] bias_y = params[-self.shape[0][1]:] # normalize weights sq_xf = weights_xf * weights_xf norm_xf = gpu.sqrt(sq_xf.sum(axis=0)) + SMALL sq_yf = weights_yf * weights_yf norm_yf = gpu.sqrt(sq_yf.sum(axis=0)) + SMALL norm_xyf = (norm_xf.mean() + norm_yf.mean())/2. self.avg_nxyf *= 0.95 self.avg_nxyf += (0.05 * norm_xyf) weights_xf *= (self.avg_nxyf / norm_xf) weights_yf *= (self.avg_nxyf / norm_yf) sq_fh = weights_fh*weights_fh norm_fh = gpu.sqrt(sq_fh.sum(axis=1)) + SMALL self.avg_nfh *= 0.95 self.avg_nfh += (0.05 * norm_fh.mean()) weights_fh *= (self.avg_nfh / norm_fh[:, gpu.newaxis]) # normalization done factors_x = gdot(x, weights_xf) factors_y = gdot(y, weights_yf) factors = factors_x * factors_y h, h_sampled = bernoulli(factors, wm=weights_fh, bias=bias_h, sampling=True) factors_h = gdot(h_sampled, weights_fh.T) g[:self.xf_sz] = -gdot(x.T, factors_y*factors_h).ravel() g[self.xf_sz:self._cum_xy] = -gdot(y.T, factors_x*factors_h).ravel() g[self._cum_xy:self._cum_xyh] = -gdot(factors.T, h_sampled).ravel() g[self._cum_xyh:self.size] = -h.sum(axis=0) g[self.size:-self.shape[0][1]] = -x.sum(axis=0) g[-self.shape[0][1]:] = -y.sum(axis=0) # 3way cd way = np.random.rand() > 0.5 if way: # reconstruct y (output) first. tmp = factors_x * factors_h y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y) factors_y[:] = gdot(y1, weights_yf) # then reconstruct x (input). tmp = factors_y * factors_h x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x) factors_x[:] = gdot(x1, weights_xf) else: # reconstruct x (input) first. tmp = factors_y * factors_h x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x) factors_x[:] = gdot(x1, weights_xf) # then reconstruct y (output). tmp = factors_x * factors_h y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y) factors_y[:] = gdot(y1, weights_yf) factors[:] = factors_x * factors_y h1, _ = bernoulli(factors, wm=weights_fh, bias=bias_h) factors_h[:] = gdot(h1, weights_fh.T) g[:self.xf_sz] += gdot(x1.T, factors_y*factors_h).ravel() g[:self.xf_sz] *= 1./n g[self.xf_sz:self._cum_xy] += gdot(y1.T, factors_x*factors_h).ravel() g[self.xf_sz:self._cum_xy] *= 1./n g[self._cum_xy:self._cum_xyh] += gdot(factors.T, h1).ravel() g[self._cum_xy:self._cum_xyh] *= 1./n g[self._cum_xyh:self.size] += h1.sum(axis=0) g[self._cum_xyh:self.size] *= 1./n g[self.size:-self.shape[0][1]] += x1.sum(axis=0) g[self.size:-self.shape[0][1]] *= 1./n g[-self.shape[0][1]:] += y1.sum(axis=0) g[-self.shape[0][1]:] *= 1./n return g