def computeCorr(X, Y, options): X = np.mat(X) Y = np.mat(Y) (NX, Dx) = X.shape (NY, Dy) = Y.shape assert NX == NY N = NX tau = options.tau covar_type = options.covar_type ## process weights if options.cca_weights is None: weights = common.asVector(np.ones((N, 1))) else: print 'CCA Weights:', options.cca_weights sum_weights = np.sum(options.cca_weights) weights = common.asVector(N*options.cca_weights / sum_weights) weights = common.asVector(weights) print 'weights:', weights assert np.all(weights > 0) assert len(weights) == N if covar_type == 'outer': W = np.diag(weights) Cxx = X.T*W*X/N # C[:Dx, :Dx] Cxy = X.T*W*Y/N # C[:Dx, Dx:] Cyy = Y.T*W*Y/N # C[Dx:, Dx:] else: W = np.diag(np.sqrt(weights)) Z = np.c_[W*X, W*Y] # stack X Y by rows C = np.cov(Z.T) Cxx = C[:Dx, :Dx] Cxy = C[:Dx, Dx:] Cyy = C[Dx:, Dx:] Cxx = np.mat((1-tau)*Cxx + tau*np.eye(Dx)) Cxy = np.mat(Cxy) Cyy = np.mat((1-tau)*Cyy + tau*np.eye(Dy)) return Cxx, Cxy, Cyy
def output_edges(M, L, reverseLookup): L = np.array(L) print >> sys.stderr, 'class:', L.__class__ # output top K values. Nw, Nf = M.M.shape for iw in xrange(Nw): word = M.reverseStrings[iw] # get words vw = L[iw, :] J = common.asVector(np.argsort(vw)) count = 0 if iw % 100 == 0: print >> sys.stderr, "word i", iw, '=', word for jf in reversed(J): feature = reverseLookup[jf] if feature in M.strings and L[iw, jf] > 0: print "%s,%s,%f" % (word, feature, L[iw, jf]) #count += 1 if count == options.K: break
def hist(v, bins=10): v = common.asVector(v) H = plt.hist(v, bins=bins) show()