예제 #1
0
  def testTrieDiff(self):
    trie1 = trie.Node()
    trie2 = trie.Node()
    accept1 = trie.AcceptInfo(input_rr='%eax', output_rr='%edx')
    accept2 = trie.AcceptInfo(input_rr='%eax', output_rr='%ecx')

    trie.AddToUncompressedTrie(trie1, ['0', '1', '2'], accept1)
    trie.AddToUncompressedTrie(trie1, ['0', '1', '3'], accept1)
    trie.AddToUncompressedTrie(trie1, ['0', '1', '4'], accept1)
    trie.AddToUncompressedTrie(trie1, ['0', '1', '5'], accept1)

    trie.AddToUncompressedTrie(trie2, ['0', '1', '2'], accept1)
    trie.AddToUncompressedTrie(trie2, ['0', '1', '3'], accept1)
    trie.AddToUncompressedTrie(trie2, ['0', '1', '4'], accept2)

    node_cache = trie.NodeCache()
    compressed_trie1 = node_cache.Merge(node_cache.empty_node, trie1)
    compressed_trie2 = node_cache.Merge(node_cache.empty_node, trie2)

    diffs = set()
    compressed_diffs = set()

    for diff in trie.DiffTries(trie1, trie2, node_cache.empty_node, ()):
      diffs.add(diff)

    for diff in trie.DiffTries(compressed_trie1, compressed_trie2,
                               node_cache.empty_node, ()):
      compressed_diffs.add(diff)

    self.assertEquals(
        diffs,
        set([(('0', '1', '4'), accept1, accept2),
             (('0', '1', '5'), accept1, None)]))
    self.assertEquals(diffs, compressed_diffs)
예제 #2
0
 def _build_trie(self):
     self.root = trie.Node()
     for t, m in zip(self.T, self.multiplicities):
         items = sorted(t,
                        key=lambda item: self.sort_order[item],
                        reverse=True)
         items = [item for item in items if item in self.frequent_items]
         self.root.insert(items, m)
예제 #3
0
 def MakeUncompressedTrie(self):
   uncompressed = trie.Node()
   accept = trie.AcceptInfo(input_rr='%eax', output_rr='%edx')
   trie.AddToUncompressedTrie(uncompressed, ['0', '1', '2'], accept)
   trie.AddToUncompressedTrie(uncompressed, ['0', '1', '2', '3'], accept)
   trie.AddToUncompressedTrie(uncompressed, ['0', '1', '3'], accept)
   trie.AddToUncompressedTrie(uncompressed, ['0', '1', '4'], accept)
   trie.AddToUncompressedTrie(uncompressed, ['0', '1', '5'], accept)
   return uncompressed
예제 #4
0
 def autocomplete_load_node(self):
     tmp = self.languages_in.currentText().lower()
     dill.dump(self.node, open(f"autocomplete/{self.actual_lang}.pickle", "wb"))
     self.actual_lang = tmp
     if os.path.isfile(f"./autocomplete/{self.actual_lang}.pickle"):
         self.node = dill.load(open(f"autocomplete/{self.actual_lang}.pickle", "rb"))
     else:
         node = trie.Node()
         dill.dump(node, open(f"autocomplete/{self.actual_lang}.pickle", "wb"))
         self.node = node
예제 #5
0
def build_trie(state_feat_dic):
    """
  build trie nodes with state-features dictionary
  :param  state_feat_dic:  state-features dictionary
  :return:                 root node of trie
  """
    trie_root = trie.Node()
    for num, state_feat in enumerate(sorted(state_feat_dic.keys()), start=1):
        if num % 1000000 == 0:
            logging.info('%dm-th trie node inserting..', num / 1000000)
        trie_root.insert(state_feat, state_feat_dic[state_feat])
    return trie_root
예제 #6
0
def main(fin, output_stem):
    """
  make syllable-morpheme TRIE dictionary
  :param  fin:          input file
  :param  output_stem:  output file name without extension
  """
    syll_morph_dic = defaultdict(set)
    for line_num, line in enumerate(fin, start=1):
        if line_num % 1000000 == 0:
            logging.info('%dm-th line', (line_num / 1000000))
        line = line.strip()
        if not line:
            continue
        syllable, morph = unicode(line, 'UTF-8').split(u'\t', 1)
        if _ANAL_RESULT_DELIM in morph or _MORPH_DELIM in morph:
            raise RuntimeError('Delimiter in morpheme results')
        else:
            morph = morph.replace(u'\t', _ANAL_RESULT_DELIM).replace(
                u' + ', _MORPH_DELIM)
        syll_morph_dic[syllable].add(morph)

    trie_root = trie.Node()
    for syllable in sorted(syll_morph_dic.keys()):
        morphs = sorted(list(syll_morph_dic[syllable]))
        trie_root.insert(syllable, _ANAL_RESULT_DELIM.join(morphs))

    fout_key = open('%s.trie' % output_stem, 'wb')
    fout_val = open('%s.val' % output_stem, 'w')
    fout_val_idx = open('%s.val.len' % output_stem, 'wb')
    val_serial = 0
    nodes = trie_root.breadth_first_traverse()
    for idx, node in enumerate(nodes):
        logging.debug(u'%d:%s', idx, node)
        val_idx = -1
        if node.value:
            val_idx = val_serial
            val_serial += 1
            uni_val = (node.value + u'\0').encode('UTF-32LE')
            fout_val.write(uni_val)
            fout_val_idx.write(struct.pack(
                'h',
                len(uni_val) / 4))  # length include terminating zero value
        fout_key.write(node.pack(val_idx))
    logging.info('Number of nodes: %d', len(nodes))
    logging.info('Number of values: %d', val_serial)
예제 #7
0
contents_temp = contents

titles_temp = titles

for i in range(NN):
    for j in range(len(contents[i])):
        contents[i][j] = unidecode.unidecode(contents[i][j])
    for j in range(len(titles[i])):
        titles[i][j] = unidecode.unidecode(titles[i][j])
        
getReference = {}
documentRoot = []
collection = trie.CollectionNode()

for i in range(NN):
    newDocument = trie.Node()
    documentRoot.append(newDocument)
    getReference[get_docID[i]] = newDocument
max_tf = {}


start = time.time()
for i in tqdm(range(NN)):
    for w in contents_temp[i]:
        collection.add_document(w, 0, get_docID[i])
        documentRoot[i].add(w, 0)
        if get_docID[i] in max_tf:
            max_tf[get_docID[i]] = max(documentRoot[i].count_words(w, 0), max_tf[get_docID[i]])
        else:
            max_tf[get_docID[i]] = documentRoot[i].count_words(w, 0)
    for w in titles_temp[i]:
예제 #8
0
import trie
import csv
word_list = []

full_name_root = trie.Node()
middle_name_root = trie.Node()
last_name_root = trie.Node()

with open('../data/test_data_sample.csv', 'r') as csvFile:
    reader = csv.reader(csvFile)
    counter = 0
    for w in reader:
        full_name = ""
        word_list.append(w)
        #print("Added : " + w[0] + "Index in list : " + str(counter))
        #first_name_root.add_word(w[0].lower(),index_in_list=counter)
        full_name += w[0].lower()
        if len(w) > 1:
            middle_name_root.add_word(w[1].lower(), index_in_list=counter)
            full_name += w[1].lower()
        if len(w) > 2:
            last_name_root.add_word(w[2].lower(), index_in_list=counter)
            full_name += w[2].lower()
        full_name_root.add_word(full_name, index_in_list=counter)
        counter += 1


def getName(index):
    name = ""
    l = len(word_list[index])
    for i in range(0, l):
예제 #9
0
 def __init__(self, validator):
     self.total_instructions = 0
     self.num_valid = 0
     self.validator = validator
     self.sub_trie = trie.Node()
     self.node_cache = trie.NodeCache()
예제 #10
0
import trie
import sqlite3

word_list = []

full_name_root = trie.Node()

sqlite3.connect('../data/abc.db')
con = sqlite3.connect('../data/abc.db')  # database file input
cur = con.cursor()

query = "select username from user order by username"

cur.execute(query)
result = cur.fetchall()

counter = 0
for w in result:
    # word_list.append(r[0])
    full_name = ""
    word_list.append(w)
    # print("Added : " + w[0] + "Index in list : " + str(counter))
    #first_name_root.add_word(w[0].lower(),index_in_list=counter)
    full_name += w[0].lower()
    # if len(w) > 1:
    #     middle_name_root.add_word(w[1].lower(),index_in_list=counter)
    #     full_name += w[1].lower()
    # if len(w) > 2:
    #     last_name_root.add_word(w[2].lower(),index_in_list=counter)
    #     full_name += w[2].lower()
    full_name_root.add_word(full_name, index_in_list=counter)