Ejemplo n.º 1
0
 def test_add(self):
     t = Trie()
     t.add('hello')
     self.assertEquals(t.trie,
                       {'h': {
                           'e': {
                               'l': {
                                   'l': {
                                       'o': {
                                           '$': None
                                       }
                                   }
                               }
                           }
                       }})
     t.add('hell')
     self.assertEquals(
         t.trie, {'h': {
             'e': {
                 'l': {
                     'l': {
                         'o': {
                             '$': None
                         },
                         '$': None
                     }
                 }
             }
         }})
Ejemplo n.º 2
0
class Bigram():
    def __init__(self):
        self.minfreq = -3.14e+100
        self.load()
        self.construct_Trie()

    def load(self):
        root = 'model_params'
        with open(root + '/bidic.json', 'r') as f:
            self.bidic = json.load(f)
        with open(root + '/pinyin.json', 'r') as f:
            self.pinyindic = json.load(f)

    def construct_Trie(self):
        self.trie = Trie()
        for key in self.pinyindic.keys():
            self.trie.add(key)

    def construct_DAG(self, seq):
        # {key: list}
        self.DAG = {}
        for i in range(1, len(seq) - 1):
            self.DAG[i] = self.trie.scan(seq[i:-1], i)
        # BOS EOS
        self.DAG[len(seq) - 1] = [len(seq) - 1]
        self.DAG[0] = [0]

    def dp_search(self, seq):
        seq = '^' + seq + '$'
        self.construct_DAG(seq)
        # prob max
        viterbi = {}
        for i in range(len(seq)):
            viterbi[i] = {}
        # { i :{ end1: (prob, next), end2 : (prob, next) }}
        viterbi[len(seq) - 1][len(seq) - 1] = (0., len(seq))
        # 反向DP
        for i in range(len(seq) - 2, -1, -1):
            # 对每个wi起始的词求最大概率
            for x in self.DAG[i]:
                # P(wx+1...wy | wi..wx)*viterbi[x+1][index][0]
                prob_index = max(
                    (self.bidic.get(seq[x + 1:y + 1], {}).get(seq[i:x + 1], self.minfreq) +
                     viterbi.get(x + 1)[y][0], y) for y in self.DAG[x + 1])
                viterbi[i][x] = prob_index

        # BOS
        end = max((self.bidic.get(seq[1:y + 1], {}).get(seq[0], self.minfreq) +
                   viterbi.get(1)[y][0], y) for y in self.DAG[1])[1]
        # 回溯*
        start = 1
        segs = []
        while start < len(seq) - 1:
            segs.append(seq[start:end + 1])
            temp = start
            start = end + 1
            # print(viterbi[temp][end][0])
            end = viterbi[temp][end][1]
        return segs
Ejemplo n.º 3
0
def trie_dictionary(dictionary):
    trie = Trie()

    for key in dictionary.keys():
        #print key
        trie.add(key, dictionary[key])

    return trie
Ejemplo n.º 4
0
    def testExample(self):
        trie = Trie()

        trie.add("hack")
        trie.add("hackerrank")

        self.assertEqual(trie.find("hac"), 2)
        self.assertEqual(trie.find("hak"), 0)
Ejemplo n.º 5
0
def trie_dictionary(dictionary):
    trie = Trie()
    
    for key in dictionary.keys():
        #print key
        trie.add(key, dictionary[key])
        
    return trie
def build_trie(words):
    """ Return a trie built from a set of 235,886 words """

    print('building trie...')
    root = Trie()
    for i, word in enumerate(words):
        root.add(word, i)
    print('done\n')
    return root
Ejemplo n.º 7
0
 def func():
     # 等待tags和alias加载完毕
     while not tags or not all_alias:
         time.sleep(0.1)
     cmd_trie, tag_trie = Trie(), Trie()
     cmd_trie.add(cfg.cmds)
     cmd_trie.add(all_alias['cmd'].keys())
     tag_trie.add(tags.keys())
     global tries
     tries = {'cmd': cmd_trie, 'tag': tag_trie}
Ejemplo n.º 8
0
def gen_suffix_trie(fname):
    from trie import Trie, DATrie

    trie = Trie()
    pytrie = DATrie()

    for s in valid_syllables:
        trie.add(s[::-1], valid_syllables[s])

    pytrie.construct_from_trie(trie)
    pytrie.output_static_c_arrays(fname)
Ejemplo n.º 9
0
def benchmark_regex_trie(LINE):
    from trie import Trie
    import re

    trie = Trie()
    for key in KEYS:
        trie.add(key)
    regex = re.compile(trie.pattern())
    print(regex.findall(LINE))

    benchmark("regex.findall(LINE)", locals())
Ejemplo n.º 10
0
def gen_suffix_trie (fname):
    from trie import Trie, DATrie
    
    trie = Trie ()
    pytrie = DATrie ()

    for s in valid_syllables:
        trie.add (s[::-1], valid_syllables[s])
    
    pytrie.construct_from_trie (trie)
    pytrie.output_static_c_arrays (fname)
Ejemplo n.º 11
0
class ControlIt:
    def __init__(self):
        self.the_trie = Trie()

    def file_grab(self):
        log = open("content/words1.txt", 'r')
        loglist = log.readlines()
        log.close()
        cl = []
        sendc = []
        pattern = '^\w+-\s'
        for index, line in enumerate(loglist, 0):
            if re.search(pattern, line) is not None:
                test = re.search(pattern, line)
                firstFound = test.string.split(' ', 1)[0]
                secondFound = loglist[index + 1]
                if not secondFound.split(' ', 1)[0] == "ship":
                    sendc.append(secondFound.split(' ', 1)[0].lower())
                cl.append(
                    firstFound.__add__(secondFound.split(' ', 1)[0]).lower())
        try:
            with open("content/words1.txt", encoding='UTF-8') as f:
                lines = f.read().translate(
                    {ord(i): None
                     for i in ';:*.,[]|"�<>()!°&¶/'}).lower().split()
                f.close()
        except:
            with open("content/words1.txt", encoding='latin_1') as f:
                lines = f.read().translate(
                    {ord(i): None
                     for i in ';:*.,[]|"�<>()!°&¶/'}).lower().split()
                f.close()
        for index, i in enumerate(lines):
            if i.count('-') >= 3 or i == re.search('\w+-\n', i):
                lines[index] = None
        [lines.append(i) for i in cl]
        for i in sendc:
            lines = list((x for x in lines if re.sub(i, "", str(x))))
        return lines

    def grab_file(self):
        start_list = self.file_grab()
        for i in start_list:
            self.the_trie.add(self.the_trie.trie, str(i))
        mytrie = self.the_trie.get_trie()
        self.the_trie.trie_pickle(mytrie)
        # self.ct = self.the_trie
        print(mytrie)

    def main(self):
        self.grab_file()
        self.the_trie.create_binary_tree()
        self.the_trie.create_avl_tree()
Ejemplo n.º 12
0
def index_emails(path_to_directory):
    trie = Trie()

    with multiprocessing.Pool() as p:
        paths = glob.glob(os.path.join(path_to_directory, '**/*.'),
                          recursive=True)

        for pairs in tqdm(p.imap(index_path, paths), total=len(paths)):
            for (word, path) in pairs:
                trie.add(word, path)

    return trie
Ejemplo n.º 13
0
def mkEncTrie(dct):
  """
  Makes an "encoding" trie from a description dict.
  Only call from mkTries to ensure that the reverse trie gets built as well
  """
  rv=Trie()
  for key, value in dct.iteritems():
    try:
      if value[1] is not NoReverse:
        raise Exception
    except: rv.add(key, value)
    else: rv.add(key, value[0])
  return rv
Ejemplo n.º 14
0
def mkEncTrie(dct):
    """
  Makes an "encoding" trie from a description dict.
  Only call from mkTries to ensure that the reverse trie gets built as well
  """
    rv = Trie()
    for key, value in dct.iteritems():
        try:
            if value[1] is not NoReverse:
                raise Exception
        except:
            rv.add(key, value)
        else:
            rv.add(key, value[0])
    return rv
Ejemplo n.º 15
0
class ControlIt:
    def __init__(self):
        self.the_trie = Trie()

    def file_grab(self):
        log = open("content/words1.txt", 'r')
        loglist = log.readlines()
        log.close()
        cl = []
        sendc = []
        pattern = '^\w+-\s'
        for index, line in enumerate(loglist, 0):
            if re.search(pattern, line) is not None:
                test = re.search(pattern, line)
                firstFound = test.string.split(' ', 1)[0]
                secondFound = loglist[index + 1]
                if not secondFound.split(' ', 1)[0] == "ship":
                    sendc.append(secondFound.split(' ', 1)[0].lower())
                cl.append(firstFound.__add__(secondFound.split(' ', 1)[0]).lower())
        try:
            with open("content/words1.txt", encoding='UTF-8') as f:
                lines = f.read().translate({ord(i): None for i in ';:*.,[]|"�<>()!°&¶/'}).lower().split()
                f.close()
        except:
            with open("content/words1.txt", encoding='latin_1') as f:
                lines = f.read().translate({ord(i): None for i in ';:*.,[]|"�<>()!°&¶/'}).lower().split()
                f.close()
        for index, i in enumerate(lines):
            if i.count('-') >= 3 or i == re.search('\w+-\n', i):
                lines[index] = None
        [lines.append(i) for i in cl]
        for i in sendc:
            lines = list((x for x in lines if re.sub(i, "", str(x))))
        return lines

    def grab_file(self):
        start_list = self.file_grab()
        for i in start_list:
            self.the_trie.add(self.the_trie.trie, str(i))
        mytrie = self.the_trie.get_trie()
        self.the_trie.trie_pickle(mytrie)
        # self.ct = self.the_trie
        print(mytrie)

    def main(self):
        self.grab_file()
        self.the_trie.create_binary_tree()
        self.the_trie.create_avl_tree()
Ejemplo n.º 16
0
class TestTrie(unittest.TestCase):
    """ Python test ment to run on saved copy of slack JSON data
    """
    def setUp(self):
        self.trie = Trie()
        #Default Slack JSON file name
        with open('users.json') as users:
            loaded = json.load(users)
            for member in loaded['members']:
                if 'real_name' not in member:
                    continue
                self.trie.add_name(0, member['real_name'].lower(), member)

    def test_addition(self):
        """ Making Sure trie entries are added correctly
        """
        added = self.trie.add('Jane Doe'.lower(), {'value': 0})
        self.assertTrue(added)
        self.assertIsNotNone(self.trie.search('Jane Doe'.lower()))

    def test_search(self):
        """ Verifying Trie entries
        """
        self.assertIsNone(self.trie.search('John Doe'.lower()))
        self.assertIsNotNone(self.trie.search('Aaron Long'.lower()))
    def __create_documents(self, directory):
        '''
		recebe um diretorio ao qual sao realizadas as instanciacoes dos Documents() e adiciona suas palavras a trie.
		'''
        archives = os.listdir(directory)
        documents = Lista()
        temp = Trie()
        for archive_name in archives:
            doc = Documents(directory + archive_name)
            documents.anexar(doc)
            for ngram in doc.ngrams:
                aux_words = ngram.sequence
                string = ''
                for k in aux_words:
                    string += k + ' '
                temp.add(string, doc)
        return documents, temp
Ejemplo n.º 18
0
class TrieTest(unittest.TestCase):
    def setUp(self):
        self.trie = Trie()
        self.trie.add('CAT')
        self.trie.add('CAR')
        self.trie.add('CART')
        self.trie.add('DO')
        self.trie.add('DOG')
        self.trie.add('X')

    def test_root_children(self):
        root = self.trie.root

        self.assertEqual(3, len(root.children))
        self.assertTrue(root.children['C'])
        self.assertTrue(root.children['D'])
        self.assertTrue(root.children['X'])

    def test_single_letter(self):
        x_node = self.trie.root.children['X']
        self.assertEqual(0, len(x_node.children))
        self.assertTrue(x_node.is_word)

    def test_prefix_is_also_word(self):
        d_node = self.trie.root.children['D']
        self.assertFalse(d_node.is_word)

        do_node = d_node.children['O']
        self.assertTrue(do_node.is_word)

        dog_node = do_node.children['G']
        self.assertTrue(dog_node.is_word)

    def test_multiple_branches_are_words(self):
        ca_node = self.trie.root.children['C'].children['A']
        self.assertFalse(ca_node.is_word)

        cat_node = ca_node.children['T']
        self.assertTrue(cat_node.is_word)

        car_node = ca_node.children['R']
        self.assertTrue(car_node.is_word)

        cart_node = car_node.children['T']
        self.assertTrue(cart_node.is_word)
Ejemplo n.º 19
0
class Router:
    def __init__(self):
        self.trie = Trie()

    def add_route(self, route, route_action):
        segments = ['/' + seg for seg in route.split('/')]
        self.trie.add(segments, route_action)

    def show_routes(self):
        root = self.trie.root
        th = []
        for thing in self.trie.get_paths()(root.children.values()[0]):
            th.append(thing)
        return th

    def load_route(self, route):
        segments = ['/' + seg for seg in route.split('/')]
        action, bindings = self.trie.find(segments, {})
        return functools.partial(action, **bindings)
class Autocomplete(object):
    """ This class will iplement the CharNode class to attempt to complete and autocomplete will most likely redo this in python"""
    def __init__(self):
        self.root = Trie()
        self.dictionary= [word.replace('\n', '') for word in open("/usr/share/dict/words", "r").readlines()]
        # self.dictionary =['app','apple', 'application', 'apply','apricot']
        for word in self.dictionary:
            self.root.add(word)



    def autocompleteTest(self, word):

        curr = self.root.root
        for letter in word:
            curr = curr.children.get(letter)
            if curr is None:
                return  # No words with given prefix word

        yield from curr.all_words_from_current_node(word)
Ejemplo n.º 21
0
def mkDecTrie(dct):
  """
  Makes a "decoding", or "reverse" trie from a description dict
  Only call from mkTries to ensure that the encoding trie gets built as well
  """
  rv=Trie()
  for key, value in dct.iteritems():
    try: 
      if value[1] is not NoReverse:
        raise Exception
    except:
      try:
        if not callable(value[1].find_prefix):
          raise Exception
      except: rv.add(value, key)
      else:
        new_key=value[0]
        new_val=(key,revTrie(value[1]))+value[2:]
        rv.add(new_key, new_val)
  return rv
Ejemplo n.º 22
0
    def testAddSubstring(self):
        trie = Trie()

        trie.add("aberer")
        trie.add("ab")
        trie.add("a")

        self.assertEqual(trie.find("a"), 3)
        self.assertEqual(trie.find("ab"), 2)
        self.assertEqual(trie.find("aberer"), 1)

        trie.add("abe")
        self.assertEqual(trie.find("abe"), 2)
Ejemplo n.º 23
0
def mkDecTrie(dct):
    """
  Makes a "decoding", or "reverse" trie from a description dict
  Only call from mkTries to ensure that the encoding trie gets built as well
  """
    rv = Trie()
    for key, value in dct.iteritems():
        try:
            if value[1] is not NoReverse:
                raise Exception
        except:
            try:
                if not callable(value[1].find_prefix):
                    raise Exception
            except:
                rv.add(value, key)
            else:
                new_key = value[0]
                new_val = (key, revTrie(value[1])) + value[2:]
                rv.add(new_key, new_val)
    return rv
Ejemplo n.º 24
0
def read_volunteers():
    """ Read all the volunters in and orchestrate their transformation
    """
    group = None # Will hold Volunteer Objects
    user_trie = Trie() # Will contain complete slack user list in JSON
    with open("volunteers.csv") as volunteers:
        reader = csv.reader(volunteers)
        group = [Volunteer(line) for line in reader]
        group.pop(0)
    with open("config.yaml") as config:
        reader = yaml.load(config)
        user_list = get_users_slack(reader["slack"])
        for user in user_list:
            if 'real_name' not in user:
                continue
            user_trie.add(user['real_name'].lower(), user)
    md_file = open('./volunteers.md', 'w')
    for gr in group:
        gr.parse_slack(user_trie)
        md_file.write(str(gr))
    md_file.close()
Ejemplo n.º 25
0
class TestTire(TestCase):
    def setUp(self):
        self.trie = Trie()
        self.trie.add("David")
        self.trie.add("David")
        self.trie.add("Dave")
        self.trie.add("Davidson")

    def test_recall(self):
        self.assertEqual(self.trie.count("David"), 2)
        self.assertEqual(self.trie.overlay_count("David"), 3)
        with self.assertRaises(KeyError):
            self.trie.count("Ben")

    def test_exists(self):
        self.assertFalse(self.trie.exist("Davison"))
        self.assertTrue(self.trie.exist("David"))
        self.assertFalse(self.trie.exist("Dav"))

    def test_initializer(self):
        trie = Trie(["David", "David", "Dave", "Davidson"])
        self.assertEqual(trie.count("David"), 2)
        self.assertEqual(trie.overlay_count("David"), 3)

    def test_random(self):
        name, count = self.trie.random()
        self.assertEqual(type(name), str)
        self.assertEqual(type(count), int)
Ejemplo n.º 26
0
 def test_allwords(self):
     t = Trie()
     t.add('hello')
     t.add('help')
     t.add('hell')
     t.add('hall')
     self.assertSameSet(['hello', 'help', 'hell', 'hall'], t.allwords())
Ejemplo n.º 27
0
 def test_lookup_prefix(self):
     t = Trie()
     t.add('hello')
     t.add('help')
     t.add('hell')
     t.add('hall')
     t.add('harp')
     self.assertSameSet(t.lookup('hel'), ['hello', 'help', 'hell'])
Ejemplo n.º 28
0
class Trial(IApplication):
    def __init__(self, frame):
        self.frame = frame
        self.trie = Trie()

    def initialize(self):
        pass

    def update(self):
        flag = False
        bls = self.frame.get(BadLink)
        print len(bls)
        for l in bls:
            self.trie.add(l.url, None)

        for t in sorted(Trie.flat_list, key=lambda x: x.count, reverse=True):
            print t.previous, t.count
            flag = True
        if flag:
            self.done = True

    def shutdown(self):
        print "Done"
Ejemplo n.º 29
0
 def test_features(self):
     tr = Trie('the')
     tr.add('bar')
     tr.add('batt')
     tr.add('at')
     self.assertEqual(tr.count(), 4)
     self.assertEqual(tr.has('att'), False)
     self.assertEqual(tr.has('bar'), True)
     self.assertEqual(tr.has('value'), False)
Ejemplo n.º 30
0
def create_data_model(json_data):
    '''create serializable data from json data or if available load it'''
    if not os.path.exists(pickle_filename):
        # print "Pickle file does not exists"
        sample_conversations_data = json.load(open(json_data))
        output_data = open(pickle_filename, "w")
        pickle.dump(sample_conversations_data, output_data)
        output_data.close()
        # print "file created and closed"

    # load the pickle file to get the json data
    input_data = open(pickle_filename, "r")
    # print "pickle file opened successfully"
    sample_conversations_data = pickle.load(input_data)
    # print "data loaded from pickle file"
    input_data.close()

    text_data = []  # get all the sentences in your text data

    table = string.maketrans("", "")  # ???not needed???
    for index, issue in enumerate(sample_conversations_data['Issues'][:]):
        for message in issue['Messages']:
            clean_text = string_preprocess(message['Text'])
            text_data.append(clean_text)

    trie = Trie()  # create trie object to store your data

    # get the data in your trie object
    for sentence in text_data:
        trie.add(sentence)

    # create data model if it does not exist
    if not os.path.exists(pickle_data_model):
        data_model = open(pickle_data_model, "w")
        pickle.dump(trie, data_model)
        print "data model created"
        data_model.close()
Ejemplo n.º 31
0
def tab_cmd(target, t):
    '''命令行补全'''
    res, perfix = None, target
    if t == 'cmd':
        res = tries['cmd'].getStartBy(target)
    elif t == 'tag':
        res = tries['tag'].getStartBy(target)
    else:
        # 获取路径
        basename = os.path.basename(target)
        perfix = addEscape(basename)
        target = parse_path(target)
        dirname = os.path.dirname(target)
        if os.path.isdir(dirname):
            paths = os.listdir(dirname)
            if t == 'dir':
                paths = list(
                    filter(
                        lambda path: os.path.isdir(os.path.join(dirname, path)
                                                   ), paths))
            elif t == 'file':
                paths = list(
                    filter(
                        lambda path: os.path.isfile(os.path.join(
                            dirname, path)), paths))
            paths = filter(
                lambda path: re.match(f'^{addEscape(basename)}', path, re.I),
                paths)  # 匹配前缀,忽略大小写
            # print(list(paths))
            # print(addEscape(basename))
            path_trie = Trie()
            path_trie.add(paths)
            res = path_trie.getStartBy(basename)
        else:
            res = []
    return {'code': 0, 'res': res, 'perfix': perfix}
Ejemplo n.º 32
0
    def testAddDeNovo(self):
        trie = Trie()

        trie.add("abl")
        trie.add("ac")
        trie.add("bca")
        trie.add("baa")

        self.assertEqual(trie.find("b"), 2)
        self.assertEqual(trie.find("a"), 2)
        self.assertEqual(trie.find("abl"), 1)
        self.assertEqual(trie.find("ac"), 1)
        self.assertEqual(trie.find("bca"), 1)
        self.assertEqual(trie.find("baa"), 1)
from trie import Trie
from data import *
from welcome import *
from hashmap import HashMap
from linkedlist import LinkedList

# Printing the Welcome Message
print_welcome()

# Entering cuisine data
food_types = Trie()
eateries = HashMap(len(types))
for food in types:
    food_types.add(food)
    eateries.assign(food, LinkedList())

# restaurant data-point key names:
eatery_cuisine = "cuisine"
eatery_name = "name"
eatery_price = "price"
eatery_rating = "rating"
eatery_address = "address"
# Entering restaurant data
for restaurant in restaurant_data:
    current_eateries_for_cuisine = eateries.retrieve(restaurant[0])
    current_eatery_data = HashMap(len(restaurant))
    current_eatery_data.assign(eatery_cuisine, restaurant[0])
    current_eatery_data.assign(eatery_name, restaurant[1])
    current_eatery_data.assign(eatery_price, restaurant[2])
    current_eatery_data.assign(eatery_rating, restaurant[3])
    current_eatery_data.assign(eatery_address, restaurant[4])
Ejemplo n.º 34
0
class TestWords(unittest.TestCase):
    """Tests for function words"""
    
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.mytrie = Trie()
        self.mytrie.add('ant',1)
        self.mytrie.add('ante',2)
        self.mytrie.add('antic',3)
        self.mytrie.add('antsy',4)
        self.mytrie.add('antse',5)
        self.mytrie.add('ban',6)
        self.mytrie.add('banana',7)

    def test_default_case(self):
        """Test words retrieves all words properly from Trie."""
        expected = ['ante','antic','ant','antsy','antse','banana','ban']
        actual = []
        for words in self.mytrie.words():
            actual.append(words)
        #print 'actual',actual
        #print 'expected',expected
        self.assertTrue(sorted(actual)==sorted(expected))
Ejemplo n.º 35
0
def make_trie(items):
    trie = Trie()
    for key, item in items:
        if _debug: print ": ".join([x.encode('UTF-8') for x in key, item])
        trie.add(*(unicodedata.normalize('NFC', x) for x in (key, item)))
    return trie
Ejemplo n.º 36
0
# Given a book of words. Assume you have enough main memory to accommodate all words. design a data structure to find top K maximum occurring words.
# The data structure should be dynamic so that new words can be added.

from trie import Trie


def find_k_most_frequent(word: str):
    most_frequents = []
    return most_frequents


if __name__ == '__main__':
    teststr = "the a there anaswe any by their"
    # find_k_most_frequent(teststr)
    t = Trie()
    for s in teststr.split(' '):
        print("Adding({})".format(s))
        t.add(s)

    # Search for different keys
    print("{} ---- {}".format("the", t.search("the")))
    print("{} ---- {}".format("these", t.search("these")))
    print("{} ---- {}".format("their", t.search("their")))
    print("{} ---- {}".format("thaw", t.search("thaw")))
Ejemplo n.º 37
0
def create_dictionary():
    with open(DICTIONARY_FILE) as f:
        global dictionary
        dictionary = Trie()
        for word in f:
            dictionary.add(word.strip())