def sort2d(A): """Sort the dictionary elements so that, when visualized in a 2D array, similar elements come next to each other. >>> assert_equal((36, 169), sort2d(randn(36, 169)).shape) sorting dictionaries... done. """ print "sorting dictionaries..." A = mtr(A.copy()) K = A.shape[1] # the big image size Y = int(ceil(sqrt(K))) # Create neighbor graph neighbors = [[((y-1) % Y)*Y + x, y*Y + ((x-1) % Y), y*Y + ((x+1) % Y), ((y+1) % Y)* Y + x] for x, y in zip(tile(arange(Y), [Y,1]).flatten(), tile(arange(Y), [Y,1]).flatten('F'))] neighbors = [[k if k < K else k - Y for k in l] for l in neighbors] # Do random swap and try to improve for _ in xrange(10000): a = randint(K) b = randint(K) na = neighbors[a] nb = neighbors[b] E0 = sum(A[:, a].T * A[:, na] + A[:, b].T * A[:, nb]) E1 = sum(A[:, a].T * A[:, nb] + A[:, b].T * A[:, na]) if E1 > E0: A[:, [a, b]] = A[:, [b, a]] print "done." return A
def __init__(self, name, generator, selectors = None, encoders = None, updaters = None, designs = None, random_init = False, **kwds): self.name = name self.generator = generator self.Astar = self.generator.dictionary.A if hasattr(self.generator, 'dictionary') else None if designs is None: designs = [Design(self, selector, encoder, updater) for selector, encoder, updater in itertools.product(selectors, encoders, updaters)] else: for design in designs: design.experiment = self self.designs = designs # Initial dictionary set with some example sets if random_init: A = Random(self.generator.p, self.generator.K, sort=False).A else: generator.generate(-1) X = generator.X A = normalize(X[:,:generator.K]) self.As = [mtr(A.copy()) for _ in designs] self.Xs = [] self.stats = [pandas.DataFrame() for _ in designs] self.all_stats= pandas.DataFrame() self.itr = 0 self.elapsed = 0.0
def generate(self, itr): self.S = self.generate_S() X = self.dictionary.A*self.S A_signal = sqrt(mean(multiply(X, X),axis=0)) noise = randn(X.shape[0], X.shape[1])*mean(A_signal)*self.sigma self.X = mtr(X + noise) self._collect_generator_stats(A_signal, noise, itr)
def collect_stats(generator, S, oldA, A, idx, itr): """Calculates various tatistics for the given X, A, S returns (stats, A), where: stats: [reconstruction stats across all X, reconstruction stats across currently picked X] A: re-ordered dictionary accoring to the best match, if the true dictionary (Astar) is provided >>> X = matrix([[1, 2, 0, sqrt(.5)], [0, 0, 1, 2+sqrt(.5)]]); A = normalize(matrix([[1,0,1],[0,1,1]])); S = matrix(([1, 2, 0, 0],[0, 0, 1, 2],[0, 0, 0, 1])) >>> stats, _ = collect_stats(X, A, S, array([0,1])); assert_allclose( stats['stats_all'], 0, atol=1e-10) >>> _, newA=collect_stats(X, A, S, array([0,1]), normalize(matrix([[1,.9,0],[1.1,0,1]]))) >>> assert_allclose( newA, normalize(matrix([[1,1,0],[1,0,1]])) ) """ X = generator.X Xp = X[:,idx] R = X - A*S Rp = Xp - A*S[:,idx] diff_A = A - oldA Sm = mean(S,axis=1) Xc = Xp[:,:1000].T*Xp[:,:1000] stats = { 'loss_all': mean(multiply(R, R)), 'loss_sampled': mean(multiply(Rp,Rp)), 'diff_A': mean(multiply(diff_A, diff_A)), 'std_S': std(Sm), 'mean_S': mean(Sm), 'cv': std(Sm) / mean(Sm), 'mean_Xp_dist': mean(diag(Xc))-mean(Xc), 'vqd': _vqd(generator, idx) } if hasattr(generator, 'Xsnr'): Xsnr = asarray(generator.Xsnr).squeeze() stats.update({ 'mean_Xsnr': mean(Xsnr), 'std_Xsnr': std(Xsnr), 'mean_Xsnr_p': mean(Xsnr[idx]), 'std_Xsnr_p': std(Xsnr[idx]) }) if hasattr(generator, 'dictionary'): # Calculate distance Astar = generator.dictionary.A newA, newS = _best_match(Astar, A, S, itr) dA = Astar - newA stats['dist_A'] = mean(multiply(dA, dA)) dS = generator.S - newS stats['dist_S'] = mean(multiply(dS, dS)) else: newA = mtr(A.copy()) return stats, newA
def collect_stats(generator, S, oldA, A, idx, itr): """Calculates various tatistics for the given X, A, S returns (stats, A), where: stats: [reconstruction stats across all X, reconstruction stats across currently picked X] A: re-ordered dictionary accoring to the best match, if the true dictionary (Astar) is provided >>> X = matrix([[1, 2, 0, sqrt(.5)], [0, 0, 1, 2+sqrt(.5)]]); A = normalize(matrix([[1,0,1],[0,1,1]])); S = matrix(([1, 2, 0, 0],[0, 0, 1, 2],[0, 0, 0, 1])) >>> stats, _ = collect_stats(X, A, S, array([0,1])); assert_allclose( stats['stats_all'], 0, atol=1e-10) >>> _, newA=collect_stats(X, A, S, array([0,1]), normalize(matrix([[1,.9,0],[1.1,0,1]]))) >>> assert_allclose( newA, normalize(matrix([[1,1,0],[1,0,1]])) ) """ X = generator.X Xp = X[:, idx] R = X - A * S Rp = Xp - A * S[:, idx] diff_A = A - oldA Sm = mean(S, axis=1) Xc = Xp[:, :1000].T * Xp[:, :1000] stats = { 'loss_all': mean(multiply(R, R)), 'loss_sampled': mean(multiply(Rp, Rp)), 'diff_A': mean(multiply(diff_A, diff_A)), 'std_S': std(Sm), 'mean_S': mean(Sm), 'cv': std(Sm) / mean(Sm), 'mean_Xp_dist': mean(diag(Xc)) - mean(Xc), 'vqd': _vqd(generator, idx) } if hasattr(generator, 'Xsnr'): Xsnr = asarray(generator.Xsnr).squeeze() stats.update({ 'mean_Xsnr': mean(Xsnr), 'std_Xsnr': std(Xsnr), 'mean_Xsnr_p': mean(Xsnr[idx]), 'std_Xsnr_p': std(Xsnr[idx]) }) if hasattr(generator, 'dictionary'): # Calculate distance Astar = generator.dictionary.A newA, newS = _best_match(Astar, A, S, itr) dA = Astar - newA stats['dist_A'] = mean(multiply(dA, dA)) dS = generator.S - newS stats['dist_S'] = mean(multiply(dS, dS)) else: newA = mtr(A.copy()) return stats, newA
def generate_random(self): image_size, _, num_images = self.images.shape # this_image = self.images[:, :, randint(num_images)].squeeze() BUFF = 4 X = mtr(zeros((self.P, self.N))) for n in range(self.N): r=BUFF+randint(image_size-self.p-2*BUFF) c=BUFF+randint(image_size-self.p-2*BUFF) X[:,n]=self.images[r:(r+self.p), c:(c+self.p), randint(num_images)].reshape([self.P, 1]) return X, None, None
def update(self, X, A, itr): K = A.shape[1] for _ in range(self.num_iter): S = self.encoder.encode(X, A) S = equalize_activities(S, self.eq_power) Xr= A*S Gr= (Xr-X) * S.T / S.shape[1] eta = self.eta(itr) if hasattr(self.eta, '__call__') else self.eta A = A - eta / K * Gr A = mtr(normalize(A)) return A
def update(self, X, A, itr): param = { 'D': A, 'batchsize': 1000 #X.shape[1] } param.update(self.param) if self.model is None: A, self.model = trainDL(X, return_model = True, **param) else: A, self.model = trainDL(X, return_model = True, model = self.model, **param) return mtr(normalize(A))
def generate_sliding(self): _, _, num_images = self.images.shape X = mtr(zeros((self.P, self.N))) n = 0 while n < self.N: im = self._im2col(self.images[:, :, self.image_idx].squeeze(), self.p) s = min(self.N, n + im.shape[1]) print s print im.shape X[:, n:s] = im[:, :(s - n)] self.image_idx = (self.image_idx + 1) % num_images n += im.shape[1] return X, None, None
def equalize_activities(S, eq_power = .5): """Equalizes the activity. When eq_factor is closer to 1, more equalization takes place """ m = mean(abs(S), axis=1) assert m.shape == (S.shape[0], 1) dead_idx=m < 1e-12 if any(dead_idx): # Fill zero activations with random activations centered around the mean if all(dead_idx): S = asmatrix(abs(randn(S.shape[0], S.shape[1]))) else: S[nonzero(dead_idx), :] = abs(mean(m) + std(S) * randn(nonzero(dead_idx)[0].size, S.shape[1])) m = mean(S, axis=1) # Try to equalize variance of mean return mtr(multiply(S, power((mean(m) / m) , eq_power)))
def _best_match(Astar, A, S, itr): """Calculates the best matching ordering for A against Astar. If there are many dictionaries, Munkres can take a bit too long. So the matching is only done at logarithmically spaced epochs, [1,2,3,4,5,6,7,8,10,12,14,17,20,24,...] """ q = 15 if floor(q * log10(itr + 1)) != floor(q * log10(itr + 2)): C = -Astar.T * A assert all(isfinite(C)) idx = Munkres().compute(C.tolist()) newA = mtr(zeros(A.shape)) newS = zeros(S.shape) for r, c in idx: newA[:, r] = A[:, c] newS[r, :] = S[c, :] return newA, newS else: return A, S
def _best_match(Astar, A, S, itr): """Calculates the best matching ordering for A against Astar. If there are many dictionaries, Munkres can take a bit too long. So the matching is only done at logarithmically spaced epochs, [1,2,3,4,5,6,7,8,10,12,14,17,20,24,...] """ q = 15 if floor(q*log10(itr+1)) != floor(q*log10(itr+2)): C = - Astar.T * A assert all(isfinite(C)) idx = Munkres().compute(C.tolist()) newA = mtr(zeros(A.shape)) newS = zeros(S.shape) for r, c in idx: newA[:, r] = A[:, c] newS[r, :] = S[c, :] return newA, newS else: return A, S
def update_with(design, generator, A, itr): """Return a new dictionary using the examples picked by the current selection policy. """ all_stats = {} X = generator.X # Encode all training examples S = design.encoder.encode(X, A) # Pick examples to learn from idx = design.selector.select(X, A, S) # Update dictionary using these examples Xp = mtr(X[:, idx]) newA = design.updater.update(Xp, A, itr) # Collect the stats (and A will be re-ordered) stats, A = collect_stats(generator, S, A, newA, idx, itr) all_stats.update(stats) # Some top chosen examples Xp = X[:, idx[:min(len(idx), A.shape[1])]] return A, all_stats, Xp
def update(self, X, A, itr): # TODO write this return mtr(A)
def generate_S(self): rows = randint(self.dictionary.K, size=self.N*self.nnz) cols = arange(self.N).repeat(self.nnz) data = -log(rand(self.N*self.nnz)) / self.lambdaS return mtr(csc_matrix((data, (rows, cols)), shape=(self.dictionary.K, self.N)).todense())
def __init__(self, p = 5, K = 50, **kwds): self.p = p super(GeneratedDictionary, self).__init__(mtr(self.generate_A(p*p, K)), **kwds)
def encode(self, X, A): """Clean up and equalize the variance. """ S = self._encode(X,_normalize(A)) S[S<0]=0 return mtr(S)
def generate_S(self): return mtr(-log(rand(self.dictionary.K, self.N)) / self.lambdaS)