def readInput(options, filename_wordsX, filename_wordsY, filename_seed): # load data files if options.pickled: wordsX = IO.readPickledWords(filename_wordsX) wordsY = IO.readPickledWords(filename_wordsY) else: wordsX = IO.readWords(filename_wordsX) wordsY = IO.readWords(filename_wordsY) if options.filename_graphX is not None: print "loading graph -", options.filename_graphX wordsX.G = IO.unpickle(options.filename_graphX) print "loading graph -", options.filename_graphY wordsY.G = IO.unpickle(options.filename_graphY) seed_list = Struct() seed_list.X, seed_list.Y = IO.readSeed(filename_seed) # read the seed list (X,Y) wordsX.pushSeedToEnd(seed_list.X) wordsY.pushSeedToEnd(seed_list.Y) # assert sizes are correct Nx = len(wordsX.words) Ny = len(wordsY.words) if Nx != Ny: log(0, "Number of words must be the same", Nx, Ny) else: log(0, Nx, "words loaded.") NSx = len(seed_list.X) NSy = len(seed_list.Y) if NSx != NSy: log(0, "Number of seed words must be the same", NSx, NSy) else: log(0, NSx, "seed words loaded.") assert NSx == NSy if options.filename_graphX is not None: (NGx0, NGx1) = wordsX.G.shape() (NGy0, NGy1) = wordsY.G.shape() assert NGx0 == NGx1, "GX is not a square adjacency matrix" assert NGy0 == NGy1, "GY is not a square adjacency matrix" # permute Y if rand_seed > 1, (this should only be used when testing on mock data) # wordsY.permuteFirstWords(perm.randperm(perm.ID(Ny))) # MU.printMatching(wordsX.words, wordsY.words, perm.ID(Ny)) return wordsX, wordsY, seed_list
def cacheOrComputeKernel(self, options, filename, f): if options.useCache == 1 and os.path.exists(filename): print >> sys.stderr, 'Loading kernel from file:', filename return IO.unpickle(filename) else: K = f(self) print >> sys.stderr, 'Saving kernel to file:', filename IO.pickle(filename, K) return K