parser.add_option('--KNN', dest='KNN', type="int", action='store', default=10) parser.add_option('--normalize', dest='normalize', type="int", action='store', default=1) (options, args) = parser.parse_args() return options if __name__ == '__main__': # parse arguments filename_wordsX = (sys.argv[1]) # read input wordsX = IO.readPickledWords(filename_wordsX) options = parseOptions() # make graph G = makeGraph(wordsX, options) G = G.todense() if options.normalize == 1: G = toSymmetricStochastic(G, sym=(options.sym == 1), stochastic=(options.stochastic == 1), norm='l1') elif options.normalize == 2: G = toSymmetricStochastic(G, sym=(options.sym == 1), stochastic=(options.stochastic == 1), norm='l2') msk = MSK(None, wordsX.words, wordsX.words) # save the matrix. # This is hacky, since we're trusting that G is generated with rows/columns that match the order of wordsX.words msk.M = G graphFilename = filename_wordsX.replace(".", "_WG.") if options.KNN > 0: graphFilename = graphFilename.replace(".", "_KNN"+str(options.KNN)+".") IO.pickle(graphFilename, msk)
hammings = np.zeros(R) for (i, noise) in enumerate(noise_levels): for r in xrange(R): np.random.seed(r) # read files wordsX, wordsY, seedsX, seedsY = mcca.readInput(options, filename_wordsX, filename_wordsY, filename_seedX, filename_seedY) seed_length = len(seedsX.words) pi = perm.randperm(xrange(len(wordsY.words))) wordsY.permuteFirstWords(pi) if options.K > 0: GX = words_to_graph.makeGraph(wordsX, seedsX, options.graph_type, options.K) GY = words_to_graph.makeGraph(wordsY, seedsY, options.graph_type, options.K) GY = graphs.permute(GY, pi) GX = graphs.toSymmetricStochastic(GX) GY = graphs.toSymmetricStochastic(GY) print 'Graph norms', norm(GX), norm(GY) else: GX = None GY = None print 'no graphs provided' # add random noise to the features if noise > 0: if not options.pickled: wordsX.features += noise * randn(wordsX.features.shape) wordsY.features += noise * randn(wordsY.features.shape) seedsX.features += noise * randn(seedsX.features.shape) seedsY.features += noise * randn(seedsY.features.shape) else: wordsX.addReprNoise(noise)