Esempio n. 1
0
def computeCorr(X, Y, options):
    X = np.mat(X)
    Y = np.mat(Y)
    (NX, Dx) = X.shape
    (NY, Dy) = Y.shape
    assert NX == NY
    N = NX

    tau = options.tau
    covar_type = options.covar_type
    ## process weights
    if options.cca_weights is None:
        weights = common.asVector(np.ones((N, 1)))
    else:
        print 'CCA Weights:', options.cca_weights
        sum_weights = np.sum(options.cca_weights)
        weights = common.asVector(N*options.cca_weights / sum_weights)

        weights = common.asVector(weights)
        print 'weights:', weights
        assert np.all(weights > 0)
        assert len(weights) == N

    if covar_type == 'outer':
        W = np.diag(weights)
        Cxx = X.T*W*X/N  # C[:Dx, :Dx]
        Cxy = X.T*W*Y/N  # C[:Dx, Dx:]
        Cyy = Y.T*W*Y/N  # C[Dx:, Dx:]
    else:
        W = np.diag(np.sqrt(weights))
        Z = np.c_[W*X, W*Y]  # stack X Y by rows
        C = np.cov(Z.T)
        Cxx = C[:Dx, :Dx]
        Cxy = C[:Dx, Dx:]
        Cyy = C[Dx:, Dx:]

    Cxx = np.mat((1-tau)*Cxx + tau*np.eye(Dx))
    Cxy = np.mat(Cxy)
    Cyy = np.mat((1-tau)*Cyy + tau*np.eye(Dy))
    return Cxx, Cxy, Cyy
def output_edges(M, L, reverseLookup):
    L = np.array(L)
    print >> sys.stderr, 'class:', L.__class__
    # output top K values.
    Nw, Nf = M.M.shape
    for iw in xrange(Nw):
        word = M.reverseStrings[iw]  # get words
        vw = L[iw, :]
        J = common.asVector(np.argsort(vw))
        count = 0
        if iw % 100 == 0:
            print >> sys.stderr, "word i", iw, '=', word

        for jf in reversed(J):
            feature = reverseLookup[jf]
            if feature in M.strings and L[iw, jf] > 0:
                print "%s,%s,%f" % (word, feature, L[iw, jf])
                #count += 1
            if count == options.K:
                break
Esempio n. 3
0
def hist(v, bins=10):
    v = common.asVector(v)
    H = plt.hist(v, bins=bins)
    show()