def make(N, Nseed, D):
    # generate random data in latent space
    Nboth = N + Nseed
    Z = randn((Nboth, D))
    # generate X and Y from Z.
    # they should be random transformations of Z with added noise.
    X = words.Words()
    Y = words.Words()
    # create a set of words with which it will be easy to run edit distance
    #X.words = ['a' + str(i) for i in xrange(Ns)] + ['b' + str(i) for i in xrange(Nr)]
    #Y.words = ['a' + str(i) for i in xrange(Ns)] + ['c' + str(i*2) for i in xrange(Nr)]
    X.words = np.array([str(i) for i in xrange(Nboth)])
    Y.words = np.array([str(i) for i in xrange(Nboth)])
    X.freq = np.array([i for i in xrange(Nboth)])  # mock frequencies
    Y.freq = np.array([i for i in xrange(Nboth)])
    X.features = Z
    Y.features = Z
    # create random permutation, but keep last (1-q) in place

    #X.G = np.zeros((N, N), dtype=np.float)
    #Y.G = np.zeros((N, N), dtype=np.float)

    X.features = random_proj_shift(X.features)
    Y.features = random_proj_shift(Y.features)

    # permute Y randomly
    pi = perm.ID(N)
    topX = Nboth  # int(0.92*N)
    pi = np.append(perm.randperm(pi[:topX]), pi[topX:])
    Y.permuteFirstWords(pi)
    return X, Y, pi
Exemple #2
0
#                 G[i][j] = self.K[word_i][word_j]
#
#         return np.mat(G)


if __name__ == '__main__':  # test
    np.random.seed(1)
    N = 400
    D = 800

    DD = dict()
    rangeD = range(D)
    rangeN = range(N)
    for i in xrange(N):
        DD[i] = dict()
        S = perm.randperm(rangeD)
        S = S[:D/4]
        for j in S:
            DD[i][j] = randn((1, 1))[0, 0]
    print "finished constructing."
    t = time.time()
    # K = DictDictKernel(DD)
    # #import cProfile
    # #cProfile.runct("K.compute(R, R)", globals(), locals())
    # K.compute(rangeN, rangeN)
    # G = K.materialize(rangeN, rangeN)
    # print 'elapsed', time.time() - t
    # print G