Ejemplo n.º 1
0
 def setup_method(self, method):
     self.mytrie = Trie('')
     self.mytrie.add('cat')
     self.mytrie.add('cab')
     self.mytrie.add('dog')
     self.mytrie.add('what')
     self.mytrie.add('whatsup')
Ejemplo n.º 2
0
def generate_table_trie(loc):
    '''Recebe um local de pasta contendo tabelas pre formatadas e gera uma trie contendo as tabelas como folha e os caracteres das labels como nodo
       Retorna trie gerada
    '''

    print("Generating Table Trie...\n")

    #gera um local de uma tabela fonte a cada passo
    tabs = generate_loc(loc)

    #Lista com as tabelas
    list_tables = []

    #Cria trie
    t = Trie()

    #Cria lista de objetos tabela
    for tabela in tabs:
        list_tables.append(Table(RawTable(tabela)))

    #Insere labels de tabela na trie
    for tabela in list_tables:
        insert(tabela.table_label,tabela,t.root)

    
    #Preenche dicionario de acesso da trie e outros com strings pertencentes a mesma
    t.yield_strings(t.root)

    print("Generating Reverse Table Trie...\n")
    
    #Gera trie reversa para busca por sufixo
    generate_reverse_trie(t)
        
    return t
Ejemplo n.º 3
0
class Dictionary:
    """"""
    def __init__(self, dict_map):
        """
        param dict_map is a map of all words which likes
        dict_map['english'] = 'chinese' 
        """
        self.dict_trie = Trie(dict_map)
        self.similar_tree = BK_Tree()
        for key in dict_map:
            self.similar_tree.insert(key)
        self.dict_map = dict_map

    def search(self, key):
        """main word in dict"""
        return self.dict_trie.search(key)

    def getSimilarWord(self, word, num=5, maxDistance=5):
        assert word is not None
        words = self.similar_tree.topKSimilar(word, num, maxDistance)
        trans = []
        for value in words:
            tmp = self.dict_trie.search(value[0])
            data = {
                "word": value[0],
                "translation": tmp[value[0]],
                "distance": value[1]
            }
            trans.append(data)
        return trans
Ejemplo n.º 4
0
 def setUp(self):
     self.trie = Trie()
     self.case = [
         "A", "a", "aa", "aal", "aalii", "aam", "Aani", "aardvark",
         "aardwolf", "Aaron", "Aaronic", "Aaronite", "Aaronitic", "Aaru",
         "Ab", "Ababdeh", "Ababua", "abac", "abacay", "abacinate"
     ]
Ejemplo n.º 5
0
def getAutoComplete(query):
    trie = Trie(filtered_words)
    word_suggestion = jsonify({
        'query': query,
        'results': trie.suggestions(query)
    })
    return word_suggestion
Ejemplo n.º 6
0
class TestTrie(unittest.TestCase):
    def setUp(self):
        self.trie = Trie()
        self.case = [
            "A", "a", "aa", "aal", "aalii", "aam", "Aani", "aardvark",
            "aardwolf", "Aaron", "Aaronic", "Aaronite", "Aaronitic", "Aaru",
            "Ab", "Ababdeh", "Ababua", "abac", "abacay", "abacinate"
        ]

    def testAddContains(self):
        words = set(self.case)
        length = len(words) // 2
        setA = set()
        for i in range(length // 2):
            setA.add(words.pop())
        setB = words
        for word in setA:
            self.trie.add(word)
        for word in setA:
            self.assertIn(word, self.trie)
        for word in setB:
            self.assertNotIn(word, self.trie)

    def testContainsPrefix(self):
        prefixes = set(["A", "a", "aa", "aal", "Aaron", "Ab", "aba", "abac"])
        others = [
            "abaciscus", "abacist", "aback", "abactinal", "Abe", "abaction"
        ]
        for word in set(self.case) - prefixes:
            self.trie.add(word)
        for prefix in prefixes:
            self.assertTrue(self.trie.containsPrefix(prefix))
        for word in others:
            self.assertFalse(self.trie.containsPrefix(word))
Ejemplo n.º 7
0
    def test_search(self):
        """
        Test Trie.search method
        """
        # construct Trie
        trie = Trie()
        words = "on the banks of red cedar theres school thats known to all".split(
        )
        for word in words:
            trie.add(word)

        # search for existing and non-existing words
        anti_words = [word[::-1] for word in words]
        for word, anti_word in zip(words, anti_words):
            count = trie.search(word)
            self.assertEqual(count, 1)
            count = trie.search(anti_word)
            self.assertEqual(count, 0)

        # add subset of duplicates and ensure proper count is returned
        duplicates = words[:len(words) // 2]
        for word in duplicates:
            trie.add(word)
        for word, anti_word in zip(words, anti_words):
            count = trie.search(word)  # search legitimate word
            if word in duplicates:
                self.assertEqual(count, 2)
            else:
                self.assertEqual(count, 1)
            count = trie.search(anti_word)  # search nonexistent word
            self.assertEqual(count, 0)
Ejemplo n.º 8
0
    def suggestedProducts(self, products, searchWord):
        '''
        :param products:
        :param searchWord:
        :return:
        '''

        trie = Trie()

        for word in products:
            trie.insert(word)

        prefix = ""
        result = []
        p_crawl = trie.root

        prefix = ""

        # O ( len(searchWord)^2 * len(products) * 26 * len(max(products)) ) time
        # O( len(max(products)) * 26 * len(products) ) space

        for s in searchWord:
            prefix += s
            curr_result = []
            p_crawl = p_crawl.children[trie.getIndex(s)]
            if p_crawl:
                curr_result = self.dfs(p_crawl, prefix)
            result.append(curr_result)
        return result
Ejemplo n.º 9
0
    def longestWord(self, words):
        '''
        :param words:
        :return:
        '''
        ''' populate the trie. len(words) * max(len(word))  
            Use dfs to find the deepest branch in the trie O(len(words) ) time.
            as max(len(word)) is bounded, O(len(words)) time | O(len(words)) space'''

        trie = Trie()
        for word in words:
            trie.insert(word)

        root = trie.root
        max_prefix = ""
        stack = [[root, ""]]

        while stack:
            curr_node, prefix = stack.pop()
            if (len(max_prefix) < len(prefix)):

                max_prefix = prefix

            for i in range(25, -1, -1):
                if curr_node.children[i]:
                    if curr_node.children[i].isEndNode:
                        stack.append(
                            [curr_node.children[i], prefix + chr(97 + i)])
        return max_prefix
Ejemplo n.º 10
0
class Dictionary:
    """"""
    def __init__(self,dict_map):
        """
        param dict_map is a map of all words which likes
        dict_map['english'] = 'chinese' 
        """
        self.dict_trie = Trie(dict_map)
        self.similar_tree = BK_Tree()
        for key in dict_map:
            self.similar_tree.insert(key)
        self.dict_map = dict_map;
    
    def search(self,key):
        """main word in dict"""
        return self.dict_trie.search(key)
    
    def getSimilarWord(self,word,num = 5,maxDistance = 5):
        assert word is not None
        words = self.similar_tree.topKSimilar(word, num, maxDistance)
        trans = []
        for value in words:
            tmp = self.dict_trie.search(value[0])
            data = {"word": value[0],"translation":tmp[value[0]],"distance":value[1]}
            trans.append(data)
        return trans
Ejemplo n.º 11
0
def main():
    words_dictionary = Trie()
    file_name = sys.argv[1]
    lowercase_letters = string.ascii_lowercase
    try:
        with open(file_name, 'r') as file_handle:
            for word in file_handle:
                words_dictionary.insert_word(word.rstrip())
    except FileNotFoundError:
        print("The file name provided doesn't exist. Try again!")
        sys.exit()

    word_search_game = WordSearch(words_dictionary)

    while True:
        cmd = int(input("Enter 1 to play else any other digit to exit! : "))
        if cmd == 1:
            num_rows = int(
                input("Please enter number of rows for the grid : "))
            num_cols = int(
                input("Please enter number of columns for the grid : "))
            start = time.time()
            grid = [[
                random.choice(lowercase_letters) for i in range(num_cols)
            ] for j in range(num_rows)]

            print('\n')
            for row in grid:
                print(row)

            print(word_search_game.search_valid_words(grid))
            print("\nTotal execution time : ", time.time() - start, "\n")

        else:
            sys.exit()
Ejemplo n.º 12
0
class Spell_Checker():
    def __init__(self, corpus_list):
        self.word_trie = Trie()
        for sentense in corpus_list:
            valid_words = self.text_to_words(sentense)
            for valid_word in valid_words:
                self.word_trie.add(valid_word)

    def text_to_words(self, text):
        text=rep_special_chars.sub(' ', text)
        text = rep_numbers.sub('', text) # get rid of numbers
        words = text.split() # Split string into words
        return words

    def check(self, sentence):
        words = self.text_to_words(sentence)
        list_to_check = []
        for w in words:
            if not self.word_trie.exists(w):
                list_to_check.append(w)

        if list_to_check:
            print('Check the spelling of the following words:')
            for w in  list_to_check:
                print(w)
        else:
            print('No spelling errors found')

    def read_check(self):
        request = input('Would you like to check a sentense: (y/n)')
        while request == 'y':
            input_sentense = input('Introduce the sentense you would like to check:')
            self.check(input_sentense)
            request = input('Would you like to check a sentense: (y/n)')
Ejemplo n.º 13
0
def findMultiplePatternMatches(seq, patterns):
    trie = Trie(patterns)
    indicies = [i for i in xrange(len(seq) - max(map(len, patterns)) + 1) if trie.prefix_in_trie(seq[i:],1) is True]

#    for i in xrange(len(seq) - max(map(len, patterns)) + 1):
#        if trie.prefix_in_trie(seq[i:],1):
#            indicies.append(i)

    return indicies
Ejemplo n.º 14
0
def autocomplete(prefix: str, possible_queries: List[str]) -> List[str]:
    # Add all query strings to the Trie
    trie = Trie()
    for word in possible_queries:
        trie.insert(word)
    # Get the nested dictionary for input prefix
    prefix_dict = trie.find(prefix)
    # Get all words from this dictionary
    return complete_words(prefix, prefix_dict)
def trie_soln():
    trie_methods = Trie()
    print("please enter filename")
    filename = read_console()
    strings = trie_methods.read_parse_file(filename)
    trie = trie_methods.create_trie(strings, 1, 1)
    for elt in trie:
        elt = [str(i) for i in elt]
        print(" ".join(elt))
Ejemplo n.º 16
0
    def test_insert_trie_one(self):
        trie = Trie()

        arr1 = [1, 2, 3]

        trie.insert(arr1)
        collection = trie.collect([])
        print(collection)
        self.assertEqual([[1, 2, 3]], collection)
Ejemplo n.º 17
0
 def __init__(self, pinyins):
     self.pinyins = pinyins
     # 读入所有有效拼音
     self.tree = Trie()
     f = open('pinyin/pinyin_list.txt')
     # f = open('pinyin_list.txt')
     for line in f:
         self.tree.insert(line.split()[0])
     f.close()
Ejemplo n.º 18
0
 def __init__(self, dict_map):
     """
     param dict_map is a map of all words which likes
     dict_map['english'] = 'chinese' 
     """
     self.dict_trie = Trie(dict_map)
     self.similar_tree = BK_Tree()
     for key in dict_map:
         self.similar_tree.insert(key)
     self.dict_map = dict_map
Ejemplo n.º 19
0
def word_search_ii(grid, words):
	dictionary = Trie(map(chr, xrange(97, 123)))
	for word in words:
		dictionary.add(word)

	found_words = set()
	for row in xrange(len(grid)):
		for col in xrange(len(grid[0])):
			dfs_with_trie(row, col, grid, dictionary.root, found_words)
	return found_words
Ejemplo n.º 20
0
    def __init__(self, regex=r"[\w]+"):
        """ initialize the WORDS dictionary which the key is a word and the value is the occurrences of the key  """
        self.trie = Trie()
        self.regex = regex

        with open(english_words, "r") as f:  # Create a dictionary for storing all the words and its occurrences
            self.WORDS = Counter(self.words_token(f.read()))

        for word in self.WORDS.keys():  # put all the words in Trie
            self.trie.insert(word)
Ejemplo n.º 21
0
    def test_prefix_trie_matching_pattern_not_found(self):
        trie = Trie(4)
        text = 'CGCAGTAACA'
        patterns = ['ATC', 'CAT', 'CGTA']

        for pattern in patterns:
            trie.insert_key(pattern, len(pattern))

        prefix_matches = prefix_trie_matching(text, trie)
        self.assertEqual([], prefix_matches)
Ejemplo n.º 22
0
    def test_prefix_trie_matching(self):
        trie = Trie(4)
        text = 'CGCAGTAACA'
        patterns = ['CGCA', 'CGCAGT', 'CGC', 'C']

        for pattern in patterns:
            trie.insert_key(pattern, len(pattern))

        prefix_matches = prefix_trie_matching(text, trie)
        self.assertEqual(['C', 'CGC', 'CGCA', 'CGCAGT'], prefix_matches)
Ejemplo n.º 23
0
    def test_splitWord(self):
        #given
        trie = Trie()
        resultlist = ["w", "o", "r", "d"]

        #when
        testlist = trie._splitWord("word")

        #then
        assert testlist == resultlist
Ejemplo n.º 24
0
def load_trie():
    trie = Trie()
    count = 0
    with open("成语俗语.txt", encoding='utf-8') as f:
        for line in f:
            count += 1
            line = line.strip()
            trie.insert(line)
    print("word num:", count)
    return trie
Ejemplo n.º 25
0
 def test_searching(self):
     trie = Trie()
     trie.add_key("semir")
     self.assertTrue(trie.in_tree("semir"))
     self.assertFalse(trie.in_tree("semiramida"))
     trie.add_key("semiramida")
     self.assertTrue(trie.in_tree("semiramida"))
Ejemplo n.º 26
0
    def test_len_contains_empty(self):
        """
        Test len(Trie), in Trie and Trie.empty methods
        """
        # construct Trie and check empty
        trie = Trie()
        self.assertTrue(trie.empty())

        # build trie
        words = "on the banks of red cedar theres school thats known to all".split(
        )
        for word in words:
            trie.add(word)

        # check len, in, empty operators
        self.assertEqual(len(trie), len(words))
        self.assertFalse(trie.empty())
        anti_words = [word[::-1] for word in words]
        for word, anti_word in zip(words, anti_words):
            self.assertTrue(word in trie)
            self.assertFalse(anti_word in trie)

        # add duplicates
        duplicates = words[:len(words) // 2]
        for word in duplicates:
            trie.add(word)

        # check len, in, empty operators again with duplicates
        self.assertEqual(len(trie), len(words) + len(duplicates))
        self.assertFalse(trie.empty())
        anti_words = [word[::-1] for word in words]
        for word, anti_word in zip(words, anti_words):
            self.assertTrue(word in trie)
            self.assertFalse(anti_word in trie)
Ejemplo n.º 27
0
 def __init__(self, filename):
     """
     The constructor accepts the file as input.
     @input: string 
     filename
     @return None
     """
     self.loadFromFile(filename)
     self.tree = Trie()
     self.store = defaultdict(list)
     self.build_Tree( )
     Person._registry = []
Ejemplo n.º 28
0
def corpus2table(data_path, table_path=None, lang=None):
    trie = Trie()

    with open(data_path, 'r', encoding='utf-8') as inp:
        for line in inp:
            words = word_tokenize(line)
            for w in words:
                w = non_word_pattern.sub('', w)
                if not w:
                    continue
                trie.insert(f'{w.lower()}#')
    prefix_suffix_tree = trie.get_prefix_suffix_tree()

    print('Tree constructed')

    prefixes = sorted(prefix_suffix_tree.keys())
    suffix_counts = Counter()
    for v in prefix_suffix_tree.values():
        for k, count in v.items():
            suffix_counts[k] += count

    # Take N most common suffixes
    sorted_counts = suffix_counts.most_common(300)
    suffixes = [el[0] for el in sorted_counts]
    freqs = [el[1] for el in sorted_counts]

    d = pd.DataFrame(index = prefixes, columns = suffixes, dtype = int).fillna(0)
    for prefix, suffix_counts_for_prefix in prefix_suffix_tree.items():
        print(prefix)
        for suffix, count in suffix_counts_for_prefix.items():
            if suffix in d.columns:
                d.loc[prefix,suffix] = count
    
    print('Dataframe constructed')

    entropies = d.apply(entropy)

    if lang is not None:
        # Regress entropies on log frequencies
        plt.figure(figsize=(16,10))
        plt.scatter(np.log(freqs), entropies, marker = 'o')
        plt.savefig(f'/home/macleginn/Analyses/bible-tables/img/entropies_log_freqs_{lang}.png')
    
    cutoff = np.quantile(entropies, 0.9)    
    d = d.loc[:,entropies > cutoff]

    print('Columns selected')
    
    if table_path is not None:
        d.to_csv(table_path)
        
    return d
Ejemplo n.º 29
0
    def test_search_key_not_in_trie(self):
        trie = Trie()
        trie.insert_key('their', 1)
        trie.insert_key('there', 2)
        trie.insert_key('answer', 3)
        trie.insert_key('any', 4)
        trie.insert_key('bye', 5)

        self.assertIsNone(trie.search_key('hello'))
Ejemplo n.º 30
0
class Dictionary(object):
	"""docstring for Dictionary"""
	def __init__(self, dictionary_path="./dictionary.txt"):
		super(Dictionary, self).__init__()
		with open(dictionary_path) as f:
			self.words = [word.strip() for word in f]
		self.trie = Trie(self.words)

	def is_prefix(self, prefix):
		return self.trie.in_trie(prefix)

	def is_valid_word(self, prefix):
		return self.trie.is_valid_word(prefix)
Ejemplo n.º 31
0
def main():

    words  =   [    'shocking', 'jeans', 'groan', 'employ', 'milky', 'supply', 'silk', 'lean', 'brawny',
                    'peace', 'destruction', 'notice','apple', 'app', 'apps', 'self', 'loops', 'error', 'dynamic'
                ];
                
    trie = Trie(words);         # Initialize Trie with words. Initialization utilizes Trie.add_word feature.

    ## Testing Trie.delete_word feature;    
    trie.delete_word('shocking');
    trie.delete_word('apps');

    ## Testing Trie.add_word & Trie.find_word features.
    excluded_words  =   [];
    words_2 = ['sunny', 'abc', 'zorro'] + words + ['eve', 'zebra'];
    for w in words_2:
        if not trie.find_word(w):
            excluded_words.append(w);

    print(excluded_words);


    ## Testing Trie.delete_word feature.
    false_deletion = [];

    for w in words_2:
        if not trie.delete_word(w):
            false_deletion.append(w);
    
    print(false_deletion);

    # Check if deletion is successful.
    assert excluded_words == false_deletion, 'FAIL/CORRUPT DELETION in TRIE.'
    
    return 0;
Ejemplo n.º 32
0
    def replaceWords(self, dict, sentence):
        t = Trie()
        words = sentence.split()
        for key in dict:
            t.insert(key)
        new_sentence = []
        for word in words:
            d = t.get_first_word_in_item(word)
            if d:
                new_sentence.append(d)
            else:
                new_sentence.append(word)

        return " ".join(new_sentence)
Ejemplo n.º 33
0
def load_trie(loc):
    '''Recebe um arquivo de memoria contendo um objeto gerado por save_trie e retorna uma trie com dos dados'''
    with open(loc,'rb') as file:
        data_dict = dill.load(file)

    #Cria trie a partir do dicionario unpicklado
    t = Trie()

    for key in data_dict:
        insert(key,data_dict[key],t.root)

    t.yield_strings(t.root)
    #Retorna trie
    return t
Ejemplo n.º 34
0
    def trie_test(self):
        items = ["potato", "pots", "buckle"]

        trie = Trie()
        for item in items:
            trie.add(item)
        trie_items = [x for x in trie.next()]
        self.assertEqual(items, trie_items)     # test that the trie contains the same items that we put in

        node = trie.find("pot")
        containing_words = [x for x in node.terminals("pot")]
        self.assertEqual(len(containing_words), 2)      # test find returns potato and pots, and NOT buckle
        self.assertIn("potato", containing_words)       # order does not matter
        self.assertIn("pots", containing_words)
        self.assertNotIn("buckle", containing_words)
Ejemplo n.º 35
0
 def __init__(self, pinyins):
     self.pinyins = pinyins
     # 读入所有有效拼音
     self.tree = Trie()
     f = open('pinyin/pinyin_list.txt')
     # f = open('pinyin_list.txt')
     for line in f:
         self.tree.insert(line.split()[0])
     f.close()
Ejemplo n.º 36
0
 def __init__(self,dict_map):
     """
     param dict_map is a map of all words which likes
     dict_map['english'] = 'chinese' 
     """
     self.dict_trie = Trie(dict_map)
     self.similar_tree = BK_Tree()
     for key in dict_map:
         self.similar_tree.insert(key)
     self.dict_map = dict_map;
Ejemplo n.º 37
0
def main():
    from string import ascii_lowercase
    from random import choice
    import csv, re
    b = BoggleBoard()
    for i in range(4):
        for j in range(4):
            b.put((i,j), choice(ascii_lowercase))
    t = Trie()
    words =[]
    with open("wordsList.csv", 'r') as f:
        wl = csv.reader(f)
        for l in wl:
            words.extend(l)
    for word in words:
        if re.match(r"^[a-z]*$", word):
            t.put(word, len(word))

    solver = BoggleSolver(b, t)
    solver.solve()
Ejemplo n.º 38
0
	def findLongestWord(self):

		word = self.word
		trie = Trie()
		queue = deque()

		#insert key to tree and also mark all the prefix with tuple format
		for key in word: # from longest to shortest
			prefixes = trie.getAllPrefix(key)
			for pf in prefixes:
				queue.append((key, key[len(pf):]))
			trie.insert(key)

		# get the longest word form the provided dictionary
		longest_word = ['','']
		flag = 2 # mark get the first two longest
		dic = {} # mark visited word

		while queue: 
			key,suffix = queue.popleft()
			if key not in dic and suffix in trie:
				dic[key] = True
				if len(key) > len(longest_word[0]):
					longest_word[1] = longest_word[0]
					longest_word[0] = key
				elif len(key) > len(longest_word[1]):
					longest_word[1] = key
			else:
				prefixes = trie.getAllPrefix(suffix)
				for pf in prefixes:
					queue.append((key, suffix[len(pf):]))

		#print result
		print "longest_word 1 are ", longest_word[0], ', length is ',len(longest_word[0])
		print "longest_word 2 are ", longest_word[1], ', length is ',len(longest_word[1])
		print "total words can be combined by other words are", len(dic)
	 	

	 	return
Ejemplo n.º 39
0
 def __init__(self):
   self.multipliers, letters, self.letter_values = load.load_game_properties()
   self.turn_num = 0
   self.word_dict = Trie("../textfiles/wwf.txt")
   self.BOARD_SIZE = 15
   self.tiles = []
   self.empty_coords = []
   for i in xrange(self.BOARD_SIZE):
     self.tiles.append([])
     for j in xrange(self.BOARD_SIZE):
       self.tiles[i].append(Tile(self.multipliers["single"]))
       self.empty_coords.append((i,j))
   self.init_multipliers(self.multipliers)
Ejemplo n.º 40
0
        def __init__(self, tk_master, trie_file_location="trie.json"):
            self.tk_master = tk_master
            self.tk_master.minsize(width=350, height=250)
            self.trie_file_location = trie_file_location

            self.input_frame = Tk.Frame(self.tk_master)
            self.button_frame = Tk.Frame(self.tk_master)
            self.menu = Tk.Menu(tk_master)
            self.tk_master.config(menu=self.menu)

            self.input_frame.grid(row=0, column=0, sticky="n")
            self.button_frame.grid(row=0, column=1)

            self.menu.add_command(label="Save to file", command=self.save_trie)

            self.autocomplete_field = Tk.Text(self.input_frame, width=20)
            self.autocomplete_field.grid(row=0, column=0)
            self.autocomplete_field.bind("<KeyRelease>", self.autocomplete_last_word)

            self.button = Tk.Button(self.input_frame, text="Add words", command=self.add_word)

            self.trie = Trie.from_JSON_file(trie_file_location)
class Lexer:
    def __init__(self):                      # initialise the DFA class with the 5 elements in DFA
        self.file = open("input3.txt")
        self.Trie = Trie()
        self.putback_bool, self.putback_val = False, ""
        self.current_state, self.current_word = 0, ""
        self.previous_word = ""
        self.previous_state = 0
        self.states = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}        # 8 is putback and 9 is error
        self.starting_state = {0}
        self.accepting_states = {1,2,3,5,7}
        uppercase, lowercase = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                    'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                    'Y', 'Z'} , { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',  'i', 'j',
                    'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                    'w', 'x', 'y', 'z'}
        whitespace, numbers = {'\t', '\n', ' '},  { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }
        invalids = {'?', '-', '_', '+', '|', ',', '.'}

        delta = dict()                        # Set up the delta function for each state.
        for letter in uppercase:
            delta[(0, letter)] = 1
            delta[(4, letter)] = 4
            delta[(6, letter)] = 4
            # Move to putback/error state i.e. State 8/9
            delta[(3, letter)] = 8
            for i in [1,2,7]:
                delta[(i, letter)] = 9
            
        for letter in lowercase:
            delta[(0, letter)] = 2      
            delta[(1, letter)] = 1
            delta[(2, letter)] = 2
            delta[(4, letter)] = 4
            delta[(6, letter)] = 4
            # Move to putback/error state i.e. State 8/9
            delta[(3, letter)] = 8      # e.g. 0a  check
            delta[(7, letter)] = 9      # e.g. ;b  error
            
        for number in numbers:
            delta[(0, number)] = 3
            delta[(3, number)] = 3
            delta[(6, number)] = 4
            # Move to putback/error state i.e. State 8/9
            for i in [1,2]:
                delta[(i, number)] = 8
            delta[(7, number)] = 9      # e.g. ;0 error
        
        for space in whitespace:
            delta[(0, space)] = 0
            for i in [4,6]:
                delta[(i, space)] = 4

        for character in invalids:
            for i in [0,4,5]:
                 delta[(i, character)] = 9
            for i in [1,2,3,6,7]:
                delta[(i, character)] = 8

        delta[(0, '"')], delta[(6, '~')], delta[(6, '"')] = 4,4,4
        delta[(4, '"')] = 5
        delta[(4, '~')] = 6
        delta[(0, ')')], delta[(0, '(')], delta[(0, ';')] = 7,7,7
        self.delta = delta

    def transition_state(self, input):
        if ((self.current_state, input) not in self.delta.keys()):         
            self.current_state = 9
        else:
            self.previous_state = self.current_state
            if self.current_state == 3:
                if self.check_max_int():
                    self.current_state = self.delta[(self.current_state, input)]
                else:
                    self.current_state = 9
            else:
                self.current_state = self.delta[(self.current_state, input)]

            
    def run_char(self, input):
        if(input == '\t' or input == '\n' or input == ' ' or input == '') and self.current_state != 4:
            # If white space and not in string check token
            if self.current_state == 3:
                if self.check_max_int():
                    identifier = self.checkIdentifier(self.current_state)
                    if identifier != "ERROR":
                        self.Trie.proccessWord(self.current_word)
                    self.resetTrackedVariables()
                    return identifier
                else:
                    self.resetTrackedVariables()
                    return "ERROR"
            else:
                identifier = self.checkIdentifier(self.current_state)
                if identifier != "ERROR":
                    self.Trie.proccessWord(self.current_word)
                self.resetTrackedVariables()
                return identifier

        else:
            if not self.putback_bool:
                # if putback is false then change state and check state
                self.transition_state(input)            
                if self.current_state != 8:
                    #if not in putback state then add to current word
                    if input != "~" or self.current_state == 6:
                        self.current_word = self.current_word + input

                else:
                    #  if new state is putback state then set put back char,
                    #  process word and reset
                    self.set_putback(input)
                    identifier = self.checkIdentifier(self.previous_state)
                    self.Trie.proccessWord(self.current_word)
                    self.resetTrackedVariables()
                    return identifier


    def driver(self):
        with self.file as f:
            while True:
                if self.putback_bool:
                    identifier =  self.run_char(self.putback())
                else:
                    c = f.read(1)
                    if not c:
                        break
                    identifier = self.run_char(c)
                if identifier != None:
                    if identifier == 'ID':
                        return '<'+identifier +','+str(self.Trie.checkWordExists(self.previous_word))+'>'
                    elif identifier == 'STRING':
                        word = []
                        for letter in self.previous_word:
                            word.append(letter)
                        return '<' + identifier + ',' + str(word) + '>'
                    elif identifier == 'INT':
                        return '<' + identifier + ',' + self.previous_word + '>'
                    elif identifier == 'LPAR' or identifier == 'RPAR' or identifier == 'SEMICOLON':
                        return '<' + identifier + ', 0 >'
                    elif identifier == 'ERROR':
                        return '<' + identifier + '>'

    def in_accepting_state(self, state):
        return state in self.accepting_states

    def resetTrackedVariables(self):
        self.current_state = 0
        self.previous_word = self.current_word
        self.current_word = ""

    def putback(self):
        self.putback_bool = False
        return self.putback_val

    def set_putback(self, character):
        self.putback_bool = True
        self.putback_val = character

    def checkIdentifier(self, state):
        if self.in_accepting_state(state):      #check if in accepting state
            if state == 1 or state == 2:
                return "ID"
            elif state == 3:
                return "INT"
            elif state == 5:
                return "STRING"
            elif state == 7:
                if self.current_word == '(':
                    return 'LPAR'
                elif self.current_word == ')':
                    return 'RPAR'
                elif self.current_word == ';':
                    return 'SEMICOLON'
        else:
            return "ERROR"

    def check_max_int(self):
        if len(self.current_word) > 5:
            return False
        elif len(self.current_word) == 5:
            if int(self.current_word[0]) > 6:
                return False
            elif int(self.current_word[0]) == 6:
                if int(self.current_word[1]) > 5:
                    return False
                elif int(self.current_word[1]) == 5:
                    if int(self.current_word[2]) > 5:
                        return False
                    elif int(self.current_word[2]) == 5:
                        if int(self.current_word[3]) > 3:
                            return False
                        elif int(self.current_word[3]) == 3:
                            if int(self.current_word[4]) > 4:
                                return False
        return True
 def __init__(self):
     self.lexicon = Trie()
class Vocabulary:
    def __init__(self):
        self.lexicon = Trie()

    def next(self):
        return self.lexicon.next()

    def __contains__(self, item):
        return self.lexicon.find(item) is not None

    def fetch(self, path):
        """Retrieves a set of words from the given file path. Function assumes each line in file is a word.
        :param path: The path to the vocabulary file.
        """
        with open(path, 'r') as document:
            for line in document:
                for word in line.split():
                    self.lexicon.add(word)

    def word_ladder(self, origin, destination):
        """Constructs a word ladder between the given words using the fetched vocabulary. A word ladder is a sequence of
        words, from origin to destination, where each intermediary word changes exactly one letter in the previous word.
        All intermediate words in the ladder must be real words.
        Constructing a word ladder loosely follows the methodology of A* path finding. A tree data structure is used to
        store a collection of words and the paths between them. The tree is filled first with the destination word and
        is then traversed breadth first adding each word's legal one character substitutions. Traversal ends when any
        path has reached the origin and that path's ancestry is returned.
        The tree is traversed breadth first so that the shortest path is found in all cases. A tree begins at the
        destination and works backwards to the origin so that the chosen path's ancestry is in the correct order.
        :param origin: The starting word to construct a word ladder from.
        :param destination: The word that the ladder traverses to.
        :return: A sequence of words that constitutes a word ladder.
        """
        paths = Tree()                        # tree stores all possible paths
        paths.add_root(destination)           # start at destination so that ancestry path is in the correct order
        visited = set()                       # no need for ANY branch to revisit a word that another branch has been to

        for node in paths.breadth_first():
            if node.data == origin:                     # if node is origin, the word ladder is complete
                path = []
                for ancestor in node.ancestor_data():   # construct a path from this nodes ancestors.
                    path.append(ancestor)
                return path
            else:
                for word in self.similar(node.data):        # add each similar word to this nodes path...
                    if word not in visited:                 # ...only if it hasn't been visited by ANY other tree path
                        node.add(word)
                        visited.add(word)
        return []                                           # no path was found

    def similar(self, word):
        """Searches for words similar to the given word by preforming character substitutions on each character in the
        given word.
        :param word: A word to find similar words to.
        """
        if word not in self:
            raise StopIteration
        for switch_position in range(len(word)):
            walker = self.lexicon.find(word[:switch_position])
            # Each child is a possible character substitution. A valid child is one that contains the remaining
            # original characters from the given word
            for key, child in walker.children.items():
                # Don't include the original character as a valid choice
                if word[switch_position] is not key:
                    # if on the last letter of 'word' add all children
                    if switch_position is len(word) - 1:
                        yield word[:switch_position] + key
                    # otherwise, check that each child contains remaining original characters from word
                    elif word[switch_position + 1:] in child:
                        yield word[:switch_position] + key + word[switch_position + 1:]
Ejemplo n.º 44
0
#! /usr/bin/env python

# Writer: wuhanghao
# Date: 2016.2.1

from Trie import Trie

with open(r'.\text.txt') as f:
	txt = f.read().decode('gb2312')

pattern = Trie()
rst = isMatched, lastPos, leng = pattern.match(txt)
print rst
if isMatched:
	print 'pattern "%s" found' % txt[lastPos-leng:lastPos]
else:
	print 'pattern notfound'
'''
You're given a dictionary of strings, and a key. Check if the key is composed of an arbitrary number of concatenations of strings from the dictionary. For example: 

dictionary: "world", "hello", "super", "hell" 
key: "helloworld" --> return true 
key: "superman" --> return false 
key: "hellohello" --> return true
'''
from Trie import Trie

words = ["world", "hello", "super", "hell" ]
trie = Trie()

for word in words:
    trie.insert(word, 1)

def search(root, key, new_start = False):
    
    if root == None:
        return False
    
    if new_start:
        if not root.children.get(key[0], None):
            return False
    
    if(len(key) == 0):
        if root.data == 1:
            return True
        return False
        
    #Since we still have characters left, we search for the child node using the next
 def setUp(self):
     self.empty = Trie()
     self.t = Trie()
     
     self.t.add('bob',2)
     self.t.add('apple', 3)
Ejemplo n.º 47
0
 def setUp(self):
     self.lukija = WordReader()
     # test addFileNames
     self.lukija.addFileNames(["../../Material/The Adventures of Tom Sawyer by Mark Twain.txt"])
     self.trie = Trie(self.lukija)
class TestTrie(unittest.TestCase):
    
    def setUp(self):
        self.empty = Trie()
        self.t = Trie()
        
        self.t.add('bob',2)
        self.t.add('apple', 3)
        
    def testEmpty(self):
        words = self.empty.traverseWords()
        
        self.assertEqual(words,'Empty')
        
        print('\ntestEmpty PASSED')
    
    def testInsert(self):
        self.empty.add('bob',2)
        words = self.empty.traverseWords()
        self.assertEqual(words,'bob')
        
        self.empty.add('apple', 3)
        words = self.empty.traverseWords()
        self.assertEqual(words,'bob apple')
        
        print('\ntestInsert PASSED')
    
    def testIsMember(self):
        result = self.t.isMember('bob') and self.t.isMember('apple')
        self.assertTrue(result)
        
        result = self.t.isMember('bo') or self.t.isMember('bobo') or self.t.isMember('ap') or \
        self.t.isMember('dave')
        self.assertFalse(result)
        
        print('\ntestIsMember PASSED')
        
    def testCommonPrefix(self):
        self.t.add('at',5)
        words = self.t.traverseWords()
        self.assertEqual(words,'bob apple at')
        result = self.t.getValue('at')
        self.assertEqual(result, 5)
        result = self.t.isMember('at')
        self.assertTrue(result)
        
        self.t.add('ate',7)
        result = self.t.getValue('ate')
        self.assertEqual(result, 7)
        result = self.t.isMember('ate')
        self.assertTrue(result)
        
        result = self.t.getValue('at')
        self.assertEqual(result, 5)
        result = self.t.isMember('at')
        self.assertTrue(result)
        
        words = self.t.traverseWords()
        self.assertEqual(words,'bob apple at ate')
        
        self.t.remove('at')
        words = self.t.traverseWords()
        self.assertEqual(words,'bob apple ate')
        
        result = self.t.isMember('at')
        self.assertFalse(result)
        self.assertEqual(self.t.getValue('at'),None)
        
        result = self.t.isMember('ate')
        self.assertTrue(result)
        self.assertEqual(self.t.getValue('ate'),7)
        
        self.t.add('at',6)
        result = self.t.getValue('at')
        self.assertEqual(result, 6)
        result = self.t.isMember('at')
        self.assertTrue(result)
        
        words = self.t.traverseWords()
        self.assertEqual(words,'bob apple at ate')
        
        self.t.remove('ate')
        words = self.t.traverseWords()
        self.assertEqual(words,'bob apple at')
        
        result = self.t.isMember('at')
        self.assertTrue(result)
        self.assertEqual(self.t.getValue('at'),6)
        
        result = self.t.isMember('ate')
        self.assertFalse(result)
        self.assertEqual(self.t.getValue('ate'),None)
        
        print('\ntestCommonPrefix PASSED')
    
    def testRemove(self):
        self.t.add('add',5)
        result = self.t.isMember('add')
        self.assertTrue(result)
        
        result = self.t.traverseWords()
        self.assertEqual(result, 'bob apple add')
        
        boolResult = self.t.remove('apple')
        self.assertTrue(boolResult)
        result = self.t.traverseWords()
        self.assertEqual(result, 'bob add')
        
        boolResult = self.t.remove('add')
        self.assertTrue(boolResult)
        result = self.t.traverseWords()
        self.assertEqual(result, 'bob')
        
        boolResult = self.t.remove('bob')
        self.assertTrue(boolResult)
        result = self.t.traverseWords()
        self.assertEqual(result, 'Empty')
        
        print('\ntestRemove PASSED')
        
    def testUpdateValue(self):
        result = self.t.updateValue('bob',10)
        self.assertTrue(result)
        checkValue = self.t.getValue('bob')
        self.assertEqual(checkValue, 10)
        
        result = self.t.updateValue('apple',12)
        self.assertTrue(result)
        checkValue = self.t.getValue('apple')
        self.assertEqual(checkValue, 12)
        
        result = self.t.updateValue('app',1)
        self.assertTrue(result)
        checkValue = self.t.getValue('app')
        self.assertEqual(checkValue, 1)
        
        result = self.t.updateValue('dave',12)
        self.assertFalse(result)
        checkValue = self.t.getValue('dave')
        self.assertEqual(checkValue, None)
        
        print('\ntestUpdateValue PASSED')
    
    def testGetValue(self):
        result = self.t.getValue('bob')
        self.assertEqual(result, 2)
        
        result = self.t.getValue('apple')
        self.assertEqual(result, 3)
        
        result = self.t.getValue('bo')
        self.assertEqual(result, None)
        
        result = self.t.getValue('dave')
        self.assertEqual(result, None)
        
        print('\ntestGetValue PASSED')
Ejemplo n.º 49
0
class Board:
  def init_multipliers(self, multipliers):
    mult_loc = {}
    mult_loc["double letter"] = [(1,2), (2,4), (4,6)]
    mult_loc["double word"] = [(1,5), (3,7)]
    mult_loc["triple letter"] = [(0,6), (3,3), (6,0)]
    mult_loc["triple word"] = [(0,3), (3,0)]
    for mult in mult_loc:
      for i,j in mult_loc[mult]:
        self.tiles[i][j].set_multiplier(multipliers[mult])
        self.tiles[self.BOARD_SIZE - (i+1)][j].set_multiplier(multipliers[mult])
        self.tiles[i][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult])
        self.tiles[self.BOARD_SIZE - (i+1)][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult])
      for j,i in mult_loc[mult]:
        self.tiles[i][j].set_multiplier(multipliers[mult])
        self.tiles[self.BOARD_SIZE - (i+1)][j].set_multiplier(multipliers[mult])
        self.tiles[i][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult])
        self.tiles[self.BOARD_SIZE - (i+1)][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult])
    self.tiles[self.BOARD_SIZE / 2][self.BOARD_SIZE / 2].set_multiplier(multipliers["start"]) 
    
  def __init__(self):
    self.multipliers, letters, self.letter_values = load.load_game_properties()
    self.turn_num = 0
    self.word_dict = Trie("../textfiles/wwf.txt")
    self.BOARD_SIZE = 15
    self.tiles = []
    self.empty_coords = []
    for i in xrange(self.BOARD_SIZE):
      self.tiles.append([])
      for j in xrange(self.BOARD_SIZE):
        self.tiles[i].append(Tile(self.multipliers["single"]))
        self.empty_coords.append((i,j))
    self.init_multipliers(self.multipliers)
  def within_bounds(self, coords):
    return coords[0] >= 0 and coords[0] < self.BOARD_SIZE and coords[1] >= 0 and coords[1] < self.BOARD_SIZE
  def print_board(self): 
    for i in xrange(self.BOARD_SIZE):
      if i == 0:
        print("").rjust(3),
        for j in xrange(self.BOARD_SIZE):
          print (str(j)).rjust(3),
        print('\n')
      for j in xrange(self.BOARD_SIZE):
        if j == 0:
          print(str(i)).rjust(3),
        if self.tiles[i][j].get_letter() == None:
          print(self.tiles[i][j].get_multiplier()).rjust(3),
        else:
          print (self.tiles[i][j].get_letter()).rjust(3),
      print ('\n')
  def get_next_in_direction(self, coord, direction, orient):
    return (coord[0] + orient * direction[0], coord[1] + orient * direction[1])
  def compute_cross_checks(self):
    directions = [(0,1), (1,0)]
    curr_array = bitarray(26)
    curr_array.setall(False)
    alphabet = string.lowercase
    for coord in self.empty_coords:
      for direction in directions:
        score = 0
        curr_coord = self.get_next_in_direction(coord, direction, -1)
        left_word = ""
        while self.within_bounds(curr_coord) and curr_coord not in self.empty_coords:
          left_word = self.get_tile(curr_coord).get_letter() + left_word
          score += self.letter_values[self.get_tile(curr_coord).get_letter()] * self.get_tile(curr_coord).is_wild_card()
          curr_coord = self.get_next_in_direction(curr_coord,direction, -1)
        right_word = ""
        curr_coord = self.get_next_in_direction(coord, direction, 1)
        while self.within_bounds(curr_coord) and curr_coord not in self.empty_coords:
          right_word = right_word + self.get_tile(curr_coord).get_letter()
          score += self.letter_values[self.get_tile(curr_coord).get_letter()] * self.get_tile(curr_coord).is_wild_card()
          curr_coord = self.get_next_in_direction(curr_coord,direction,1)
        if left_word != "" or right_word != "":
          for i in xrange(len(alphabet)):
            cand_word = left_word + alphabet[i] + right_word
            curr_array[i] = self.word_dict.word_exists(cand_word)
        else:
          curr_array.setall(True)
        self.get_tile(coord).fill_cross_check(direction, curr_array)
        self.get_tile(coord).set_cross_check_score(direction, score)
            
  def place_letter(self, letter, coords):
    self.tiles[coords[0]][coords[1]].set_letter(letter)
    self.empty_coords.remove(coords)
  def get_adjacent_placed_tiles(self, coords):
    adjacent_tiles = []
    for x,y in [(coords[0]+i, coords[1]+j) for i in [-1,0,1] for j in [-1,0,1] if abs(i) != abs(j)]:
      if self.within_bounds((x,y)) and self.get_tile((x,y)).get_letter() != None:
        adjacent_tiles.append((x,y))
    return adjacent_tiles

  def get_turn(self):
    return self.turn_num
  def advance_turn(self):
    self.turn_num += 1
  def get_start_pos(self):
    return (self.BOARD_SIZE / 2, self.BOARD_SIZE / 2)
  def get_dict(self):
    return self.word_dict
  def get_tile(self,coords):
    return self.tiles[coords[0]][coords[1]] 
  def get_empty_coords(self):
    return self.empty_coords
  def get_turn(self):
    return self.turn_num
  def get_letter_value(self, letter):
    return self.letter_values[letter]
Ejemplo n.º 50
0
class pinyin(object):
    def __init__(self, pinyins):
        self.pinyins = pinyins
        # 读入所有有效拼音
        self.tree = Trie()
        f = open('pinyin/pinyin_list.txt')
        # f = open('pinyin_list.txt')
        for line in f:
            self.tree.insert(line.split()[0])
        f.close()

    def split(self):
        '''
        分割函数
        @param pinyin:  拼音串 str
        @return:        分割后的拼音列表 list
        '''
        # 可作为拼音开头的字母
        pinyin_initials = ['a', 'b', 'e', 'p', 'm', 'f', 'd',
                           't', 'n', 'l', 'g', 'k', 'h', 'j',
                           'q', 'x', 'r', 'z', 'c', 's', 'y', 'w']
        # pinyin_initials = self.tree.root.children
        iuv = ['i','u','v']
        grn = ['g','r','n']

        input = ''
        result = []

        for i in range(len(self.pinyins)):
            c = self.pinyins[i]
            # 读入字符 c
            input += c
            # c是 i|u|v,并且是拼音串的首字母
            if c in iuv and len(input)==1:
                return False,None
            # 当前拼音有效或者是有效拼音的一部分
            if self.tree.find_initial_with(input):
                continue
            # c是声母
            if c in pinyin_initials:
                # 前面的拼音为有效拼音
                if self.tree.find_initial_with(input[:-1]):
                    # 在c前断开
                    result.append(input[:-1])
                    input = input[-1:]
                    continue
                else:
                    return False,None
            # 倒数第二个字母为 g|r|n
            elif input[-2:-1] in grn:
                # 在 g|r|n 前断开有效
                if self.tree.find_initial_with(input[:-2]):
                    # 在 g|r|n 前断开
                    result.append(input[:-2])
                    input = input[-2:]
                    continue
                # 在 g|r|n 后断开有效
                elif self.tree.find_initial_with(input[:-1]):
                    # 在 g|r|n 后断开
                    result.append(input[:-1])
                    input = input[-1:]
                    continue
            else:
                # 单独断开
                result.append(input)
                input = ''

        result.append(input)

        return True,result
    def __init__(self):                      # initialise the DFA class with the 5 elements in DFA
        self.file = open("input3.txt")
        self.Trie = Trie()
        self.putback_bool, self.putback_val = False, ""
        self.current_state, self.current_word = 0, ""
        self.previous_word = ""
        self.previous_state = 0
        self.states = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}        # 8 is putback and 9 is error
        self.starting_state = {0}
        self.accepting_states = {1,2,3,5,7}
        uppercase, lowercase = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                    'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                    'Y', 'Z'} , { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',  'i', 'j',
                    'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                    'w', 'x', 'y', 'z'}
        whitespace, numbers = {'\t', '\n', ' '},  { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }
        invalids = {'?', '-', '_', '+', '|', ',', '.'}

        delta = dict()                        # Set up the delta function for each state.
        for letter in uppercase:
            delta[(0, letter)] = 1
            delta[(4, letter)] = 4
            delta[(6, letter)] = 4
            # Move to putback/error state i.e. State 8/9
            delta[(3, letter)] = 8
            for i in [1,2,7]:
                delta[(i, letter)] = 9
            
        for letter in lowercase:
            delta[(0, letter)] = 2      
            delta[(1, letter)] = 1
            delta[(2, letter)] = 2
            delta[(4, letter)] = 4
            delta[(6, letter)] = 4
            # Move to putback/error state i.e. State 8/9
            delta[(3, letter)] = 8      # e.g. 0a  check
            delta[(7, letter)] = 9      # e.g. ;b  error
            
        for number in numbers:
            delta[(0, number)] = 3
            delta[(3, number)] = 3
            delta[(6, number)] = 4
            # Move to putback/error state i.e. State 8/9
            for i in [1,2]:
                delta[(i, number)] = 8
            delta[(7, number)] = 9      # e.g. ;0 error
        
        for space in whitespace:
            delta[(0, space)] = 0
            for i in [4,6]:
                delta[(i, space)] = 4

        for character in invalids:
            for i in [0,4,5]:
                 delta[(i, character)] = 9
            for i in [1,2,3,6,7]:
                delta[(i, character)] = 8

        delta[(0, '"')], delta[(6, '~')], delta[(6, '"')] = 4,4,4
        delta[(4, '"')] = 5
        delta[(4, '~')] = 6
        delta[(0, ')')], delta[(0, '(')], delta[(0, ';')] = 7,7,7
        self.delta = delta
Ejemplo n.º 52
0
class  PyTrieTestCases(unittest.TestCase):
    def setUp(self):
        self.lukija = WordReader()
        # test addFileNames
        self.lukija.addFileNames(["../../Material/The Adventures of Tom Sawyer by Mark Twain.txt"])
        self.trie = Trie(self.lukija)

    def tearDown(self):
        self.lukija.clear('all')
        self.trie.clear()
        self.lukija = None
        self.trie = None

    def testSimpleAddFind(self):
        """ Add some objects to Trie and see if you can find them """
        checklist = []
        for object in WordsToAdd:
            self.trie.add(object[0], object[1:]) # Add words to Trie
        for word in WordsToAdd:
            # Get the position of each word
            pos, _, _ = self.trie.find(word[0])
            # We add the word and the found positions to match list formatting
            # to the input
            checklist.append((word[0], pos[0][0], pos[0][1]))
        self.assertEqual(checklist , WordsToAdd,
                         'Trie: Did not find all words that were supposed to add')

    def testMultiWordFind(self):
        for object in MultiWordAdd:
            self.trie.add(object[0], object[1:]) # Add words to Trie
        pos, _, _ = self.trie.find('a')
        self.assertEqual(pos, MultiWordFindA,
                         'Trie: Error finding multiple instances of a word')
        pos, _, _ = self.trie.find('b')
        self.assertEqual(pos, MultiWordFindB,
                         'Trie: Error finding multiple instances of a word')


    def testWordCounter(self):
        """ Tests that both the reader and the tree can count the words """
        self.lukija.clear('all')
        self.lukija.addFileNames(["../../Material/50words_in_UTF-8.txt"])
        self.assertEqual(self.lukija.wordcount, 0,
                         'Trie: WordReader clearing failed')
        self.lukija.readWords()
        self.assertEqual(self.lukija.wordcount, 50,
                         'Trie: WordReader failed in reading words')
        self.trie.clear()
        self.trie.addFromReader()
        self.assertEqual(self.trie.wordCount, 50,
                         'Trie: word counting failed')
Ejemplo n.º 53
0
	def __init__(self, dictionary_path="./dictionary.txt"):
		super(Dictionary, self).__init__()
		with open(dictionary_path) as f:
			self.words = [word.strip() for word in f]
		self.trie = Trie(self.words)