コード例 #1
0
ファイル: mcca.py プロジェクト: vaswani/LEXICON_INDUCTION
def readInput(options, filename_wordsX, filename_wordsY, filename_seed):
    # load data files
    if options.pickled:
        wordsX = IO.readPickledWords(filename_wordsX)
        wordsY = IO.readPickledWords(filename_wordsY)
    else:
        wordsX = IO.readWords(filename_wordsX)
        wordsY = IO.readWords(filename_wordsY)

    if options.filename_graphX is not None:
        print "loading graph -", options.filename_graphX
        wordsX.G = IO.unpickle(options.filename_graphX)
        print "loading graph -", options.filename_graphY
        wordsY.G = IO.unpickle(options.filename_graphY)

    seed_list = Struct()
    seed_list.X, seed_list.Y = IO.readSeed(filename_seed)  # read the seed list (X,Y)
    wordsX.pushSeedToEnd(seed_list.X)
    wordsY.pushSeedToEnd(seed_list.Y)

    # assert sizes are correct
    Nx = len(wordsX.words)
    Ny = len(wordsY.words)
    if Nx != Ny:
        log(0, "Number of words must be the same", Nx, Ny)
    else:
        log(0, Nx, "words loaded.")

    NSx = len(seed_list.X)
    NSy = len(seed_list.Y)

    if NSx != NSy:
        log(0, "Number of seed words must be the same", NSx, NSy)
    else:
        log(0, NSx, "seed words loaded.")
    assert NSx == NSy

    if options.filename_graphX is not None:
        (NGx0, NGx1) = wordsX.G.shape()
        (NGy0, NGy1) = wordsY.G.shape()
        assert NGx0 == NGx1, "GX is not a square adjacency matrix"
        assert NGy0 == NGy1, "GY is not a square adjacency matrix"

    # permute Y if rand_seed > 1, (this should only be used when testing on mock data)
    # wordsY.permuteFirstWords(perm.randperm(perm.ID(Ny)))
    # MU.printMatching(wordsX.words, wordsY.words, perm.ID(Ny))
    return wordsX, wordsY, seed_list
コード例 #2
0
ファイル: words.py プロジェクト: vaswani/LEXICON_INDUCTION
 def cacheOrComputeKernel(self, options, filename, f):
     if options.useCache == 1 and os.path.exists(filename):
         print >> sys.stderr, 'Loading kernel from file:', filename
         return IO.unpickle(filename)
     else:
         K = f(self)
         print >> sys.stderr, 'Saving kernel to file:', filename
         IO.pickle(filename, K)
         return K