def encode_test(): graph = wordnet_graph() language = singleresolution(100000) node = ('levy', 'v') codes = encode(graph, node, language) print 'encoded results' print_nodes(graph, codes)
''' language is a list of words (or wordnet nodes). wordnet nodes consists of word, (word, pos), and synsets. ''' from math import * from graphparse import wordnet_graph from util import * g = wordnet_graph() def multiresolution(n_layer = 10, dim = 1000, factor = 1.6): word_size = int(ceil(dim * pow(factor, n_layer-1))) assert(word_size <= g.number_of_nodes()) top_nodes = top_k(word_size, g) # Each dimension consists of a set of words contained in a certain # bandwidth. We call those sets 'languages'. langs = [] for i in range(n_layer): start = int(floor(dim * pow(factor, i-1))) if i > 0 else 0 end = int(floor(dim * pow(factor, i))) langs.append(top_nodes[start:end]) return langs