class TrieController(): def __init__(self): self.trie = Trie() self.initTrie() self.view = TrieDemoView() self.view.textEntered.connect(self.handleTextChanged) def handleTextChanged(self,stringEntered): if len (stringEntered)==0: return print "word list for " + stringEntered text = "" for word in self.trie.getWordList(str(stringEntered)): text += word + "\n" self.view.setSuggestions(text) def initTrie(self): f = open("words.txt",'r') i = 0 for line in f: self.trie.addWord(line) i+=1 print "added " + str(i) + " words" def getView(self): return self.view
def __init__(self, parent, *args, **kwargs): ttk.Frame.__init__(self, parent, *args, **kwargs) self.root = parent #Initialize the Trie (dictionary) self.dictionary = Trie() #initialize text widget self.text = Text(self.root, width=100, height=40) self.text.grid(column=0, row=2) #Initialize the menu self.init_gui() #Bind space and right-click events on the text widget self.text.bind("<space>", self.spell_check) self.text.bind('<Button-3>', self.autocorrect) self.text.bind('<Key>', self.typing_suggestions) #Initize the filename to empty string self.filename = '' #Initialize the underline tag for mispelt words self.text.tag_configure('underline', foreground='red', underline=True) #Initialize the highligh tag for found words self.text.tag_configure('highlight', background='red') self.text.tag_configure('unhighlight', background='white') #Initialize pop-up menu for right click self.pop_menu = tkinter.Menu(parent, tearoff=0) #Initialize pop-up menu for text statistics self.pop_stats = tkinter.Menu(parent, tearoff=0)
def create_trie( post_list ): # post_list is a map whose keys are document IDs and values are the words in that document all_words = set() for words in post_list.values(): for word in words: all_words.add(word) root = Trie.Node('', [], []) for word in all_words: current = root for i in range(len(word)): char = word[i] index = Trie.find( current.children_letters, char ) # check if the children of our current node contains this char if index == -1: # this sequence has not been added so we need to add this char into a child node pos = bisect.bisect_left(current.children_letters, char) # find the position to insert bisect.insort(current.children_letters, char) node = Trie.Node(char, [], []) current.children_nodes.insert( pos, node) # insert into the same position current = node else: # this sequence is already added so move to that node current = current.children_nodes[index] if i == len(word) - 1: current.is_terminal = True return root
def GetASResult(query): start = time.clock() query = query.strip().lower() tokens = nltk.word_tokenize(query) result = [""] for token in tokens: token = token.lower() if token not in stops: data = Trie.GetNearestMatchFromTrie(wnl.lemmatize(token)) if not data[0]: possiblewords = sorted(Trie.GetListofWords(data[4], 0, 3), key=lambda x: len(x[0]))[:5] newresult = [] for item in result: for word in possiblewords: newresult.append(item + " " + word[0]) result = newresult[:] else: for i in range(len(result)): result[i] += " " + token else: for i in range(len(result)): result[i] += " " + token for i in range(len(result)): result[i] = result[i].lstrip() timetaken = time.clock() - start print("Fetched in ", timetaken, " secs") return result[:5]
def __init__(self): self.positives = ["good", "gud", "well", "great", "decent", "amazing", "excellent", "sexy", "superb", "suburb", "awesome", "awsm", "nice", "happy", "high", "average", "fast", "quick", "immersing", "immersive", "premium", "best", "better", "perfect", "perfection", "beast", "great", "fantastic", "faster", " fabulous", "blazing", "loved", "love", "marvellous", "comfortable", "unbeatable" "charge", "smooth", "beautifully", "beautiful", "superb"] self.negatives = ["very bad", "bad", "disappointment", "wrong", "never", "slow", "no", "not good", "not work", "not", "terrible", "heavy" "issue", "defect", "slowest", "lags", "waste", "doesn't work", "doesnt work", "doesnt", "doesn't", "problem", "sucks", "worst", "pathetic", "not good", "not very good", "ineffective", "poor", "not success"] self.features = ["phone", "phones", "device", "product", "mobile", "look", "front camera", "back camera", "rear camera", "camera", "selfie", "front", "photos", "pictures", "video", "images", "lowlight pictures", "lowlight", "fingerprint", "fingerprints", "finger print", "finger lock", "fingerlock", "finger", "touch", "display", "hd", "design", "build", "performance", "gorilla glass", "screen", "super amoled", "amoled", "notch", "gorrilla glass", "battery", "backup", "charging", "charge", "charger", "connectivity", "network", "face unlock", "face", "face recognition", "sound", "headset", "headphones", "audio", "speakers", "speaker", "water resistance", "water", "notifications light", "notification light", "adaptive brightness sensor", "sensor", "call quality", "call", "nfc", "wifi", "bluetooth", "other devices", "one ui", "one-ui", "ui", "os", "color os", "coloros", "miui", "pubg", "gaming", "games", "color", "ram", "memory", "heating", "price", "cost", "value for money", "budget", "affordable", "processor", "chipset", "cpu", "speed", "bloatware", "microsd", "micro sd", "storage", "weight"] # STOPWORDS self.stopwords = nltk.corpus.stopwords.words('english') for pos in self.positives: if pos in self.stopwords: self.stopwords.remove(pos) for neg in self.negatives: if neg in self.stopwords: self.stopwords.remove(neg) for ftr in self.features: if ftr in self.stopwords: self.stopwords.remove(ftr) self.trie = Trie() for pos in self.positives: self.trie.insert(pos, 'Positive') for neg in self.negatives: self.trie.insert(neg, 'Negative') for ftr in self.features: self.trie.insert(ftr, 'Features')
def init(): for ques in allQues: zh = ques.getTitle() ques.setTips(zh) pin = lazy_pinyin(zh) spell = "" for i in pin: spell = spell + i[0] #加上首字母 ques.setSpell(spell) # print(len(allQues)) # print(allQues[0].getNum()) # print(allQues[0].getSpell()) for ques in allQues: Trie.addTrieOne(ques.getNum(), ques.getSpell())
def GetResult(query, rankingType): start = time.clock() query = query.strip().lower() tokens = nltk.word_tokenize(query) tokens = [wnl.lemmatize(token) for token in tokens] tokenSet = Set(tokens) result = [] finaltoken = [] for i in range(len(tokens)): word = tokens[i] data = Trie.GetNearestMatchFromTrie(word) # data[0] -> bool result # data[1][0] -> number of repositories found in. # data[1][1] -> list of repositories # x -> repo # name, freq, wtfidf if (data[0]): result.append((word, data[1][1], i)) finaltoken.append(word) if result != []: finalresult = resultsList( GetTopLibraries( rankResults(normalize(conflatedDocids(result, rankingType)))) ) # Ranking based on TF-IDF/Cosine similarity timetaken = time.clock() - start print("Fetched in ", timetaken, " secs") return (finalresult) timetaken = time.clock() - start print("Fetched in ", timetaken, " secs") print("No results")
def creattree(): diclist = opendict() blendssearch = Trie.Trie() lendict = len(diclist) for i in range(lendict): blendssearch.insert(diclist[i]) return blendssearch
def tests(): #test trie trie = Trie.Trie() assert trie.root == {} #test add: "ate" assert trie.add("ate") == { 'a': [False, { 't': [False, { 'e': [True, {None}] }] }] } assert trie.add("atom") == { 'a': [ False, { 't': [ False, { 'e': [True, {None}], 'o': [False, { 'm': [True, {None}] }] } ] } ] } print("tests passed")
def testAutocomplete_AlmostPrefix_Present_SuggestPartial_2(self): trie = Trie.Trie() for word in prefix2: trie.insert(word, trie.root) self.assertCountEqual( Counter(trie.autocomplete("fores")), Counter(["foreshadow", "foresight", "foreseeable"]))
def visit(self,p,alpha,newEventSet,i,k,Q): for e in newEventSet: comparison=False for child in p.children: if child.event==e: comparison=True if comparison: if self.debug: print('Already have') else: q=Trie(e,k+1) p.children.append(q) if self.debug: print("New Trie:") print(p) print("alpha.events:"+str(alpha.events)) qAlphaPath=alpha.events.copy() if self.debug: print("qAlphaPath:"+str(qAlphaPath)) qAlphaPath.append(e) qAlpha=Episode(qAlphaPath,Window(i,k+1)) Q.add(qAlpha) if self.debug: print("New Q:") for ep in Q: print(Q) contained=False for ep in Q: if str(ep.events)==str(alpha.events): contained=True if contained: p.isLO=False if self.debug: print("P:") print(p)
def lookup(): search = input("Enter a string for me to lookup: ") result = TST.lookup(search) if result: print("String found!!!") else: print("String NOT found!!!")
def test_complexity(self): empty_tree = Trie.Trie() empty_list = [] test_set = create_test_set(100) tree, list = build(empty_tree, empty_list, 100000) tree_time = tree_complexity(tree, test_set) list_time = list_complexity(list, test_set) self.assertTrue(list_time > tree_time)
def suggestions(): # Receiveing the parameters from the GET request queryParams = request.args.get('q') if queryParams == "": return 'null' # Creating a Trie object trie = Trie() # Creating a Full Trie from the corpus received sentences = service_data for sentence in sentences: trie.addSentence(sentence) # Getting the list of suggestions by supplying query result = trie.generate_completions(queryParams) return jsonify(Completions=result)
def compressao(entrada, saida): with open(entrada, 'r') as file: texto = file.read() trie = Trie.Trie() arq_saida = open(saida, 'wb') indice = 0 indice = trie.insert(texto, indice, arq_saida) file.close() arq_saida.close()
def main(): keys = parse_file('dictionary.txt') t = Trie.Trie() for key in keys: t.insert(key) solution(t)
def LoadIndex(filename): jsonData = json.load(open(filename, "r", encoding="utf8")) count = 1 for i in jsonData: Trie.AddKeyToTrie(i, jsonData[i]) #if (count%10 == 0): # print("done with ", count, " terms") count += 1 print("=", sep="", end="")
def main(): t = Trie.TrieNode() # Create the root node t.build() os.system("cls") print("This is the extra-credit.") while len(Trie.randomWord) < 1: t.randomLookUp() randword = random.choice(Trie.randomWord) print() t.printRandomBetter(randword)
def get_palindromes(filename, length, function, regex): def generate(root, remainder, front, depth): if depth <= 0: return if remainder == "": function(root) return if front: # [ root | remainder] # [merwen | redniamer + root + remainder] candidates = backward.find_from_pref(remainder) for danc in candidates: newroot = reverse(danc) + reverse(remainder) + " " + root generate(newroot, danc, False, depth-1) # [ root | remainder ] # [mer + root + rem | ainder] for i in range(len(remainder)+1): if backward.is_word(remainder[0:i]): newroot = reverse(remainder[0:i]) + " " + root newrem = remainder[i:] generate(newroot, newrem, True, depth-1) else: # [remainder | root] # [remainder + root + redniamder | newrem] candidates = forward.find_from_pref(remainder) for cand in candidates: newroot = root + " " + remainder + cand generate(newroot, cand, True, depth-1) # [remainder | root ] # [remain | der + root + red] for i in range(1,len(remainder)+1): if forward.is_word(remainder[0:i]): newroot = root + " " + remainder[0:i] newrem = remainder[i:] generate(newroot, newrem, False, depth-1) return try: dictionary = open(filename, 'r') except: print "Error: invalid dict file." sys.exit() forward = Trie() backward = Trie() for word in dictionary: if re.compile(regex).match(word): forward.insert(word.strip()) backward.insert(word[::-1].strip()) dictionary.close() for word in forward.find_words(): for i in range(len(word)): if is_palindrome(word[0:i]): generate(word, word[i:], True, length) for i in reversed(range(len(word))): if is_palindrome(word[i:]): generate(word, reverse(word[0:i]), False, length)
def test_is_left_of_edge(self): trie = Trie.Tree(8) trie.extend([9, 12, 44, 108, 110, 111]) # ## test edge 44 current = trie.root.left.left.right self.assertEqual(current.edge, word(0b101100, 6)) self.assertEqual(current.key, word(0b00101100, 8)) # 00000000 is left of 00101100 q = word(0b00000000, 8) result = current.is_left_of_edge(q) self.assertTrue(result) # 00101011 is left of 00101100 q = word(0b00101011, 8) result = current.is_left_of_edge(q) self.assertTrue(result) # 00101100 is left of 00101100 q = word(0b0000101100, 8) result = current.is_left_of_edge(q) self.assertFalse(result) # 00101111 is left of 00101100 q = word(0b00101111, 8) result = current.is_left_of_edge(q) self.assertFalse(result) # ## test edge 0-11011-00 of 108 current = trie.root.left.right self.assertEqual(current.edge, word(0b11011, 5)) self.assertEqual(current.key, word(0b011011, 6)) # 00000000 is left of 011011 q = word(0b00000000, 8) result = current.is_left_of_edge(q) self.assertTrue(result) # 01101000 is left of 011011 q = word(0b01101000, 8) result = current.is_left_of_edge(q) self.assertTrue(result) # 01101100 is not left of 011011 q = word(0b01101100, 8) result = current.is_left_of_edge(q) self.assertFalse(result) # 01110111 is not left of 011011 q = word(0b01110111, 8) result = current.is_left_of_edge(q) self.assertFalse(result)
def test_insertion(): """Test behavior of trie.""" tree = Trie.Trie() words = [ "monkey", "monkeybusiness", "banana", "monkey", "bananas", "bananas", "m" ] for word in words: tree.insert(word) return tree
def __init__(self, documentPath, dictionaryFilePath): # Create Trie to store dictionary words self.dictionaryTrie = Trie.Trie() # Create rope structure for document self.document = None self.populateDocumentWithInputText(documentPath) # Insert dictionary words in Trie for word in self.yieldWords(dictionaryFilePath): self.dictionaryTrie.insert(word, self.dictionaryTrie.root) # Create an empty rope object to later store pasted text self.paste_text = ropes.Rope("")
def load_data(): with open('csv_data/data.csv', 'r') as f: reader = csv.reader(f, delimiter=',') headers = next(reader) data = list(reader) trie = Trie.TrieNode() for elem in data: if ' ' in elem: elem.remove(' ') if '' in elem: elem.remove('') trie.insert(' '.join(elem).strip().lower()) return trie
def testAutocomplete_LongWord_NotPresent_1(self): trie = Trie.Trie() for word in prefix1: trie.insert(word, trie.root) longEnd = "" for _ in range(20): longEnd += chr( choice([ i for i in range(65, 123) if i not in list(range(91, 97)) ])) self.assertEqual(trie.autocomplete("inter" + longEnd), None)
def testAutocomplete_NotPrefix_Not_Present_5(self): trie = Trie.Trie() for word in prefix5: trie.insert(word, trie.root) # generates a random letter (uppercase or lower) that is not that of the first letter in the prefix random_letter = choice([ i for i in range(65, 123) if i not in list(range(91, 97)) + [ord(prefixes[4][0].upper()), ord(prefixes[4][0].lower())] ]) self.assertEqual(trie.autocomplete(chr(random_letter)), None)
def test_total_count(self): t = Trie.Trie() self.assertEqual(t.total_count(), 0) t.add_word('wonder') t.add_word('happy') t.add_word('beautiful') self.assertEqual(t.total_count(), 3) t.add_word('') self.assertEqual(t.total_count(), 3)
def main(): t = Trie.TrieNode() # Create the root node t.build() while len(Trie.randomWord) < 100: t.randomLookUp() os.system("cls") print( "This is Part 1: 100 random words from Alice in Wonderland, please notice that there are no repeat words.\n" ) t.printRandom() print("\n\n")
def __init__(self, path=None): self.path = path self.lock = Lock() if path is None: self.db = sqlite3.connect(p.curdir + "/BlockChain.sqlite3") self.path = p.curdir + "/BlockChain.sqlite3" else: self.db = sqlite3.connect(path) if os.path.exists("./trie"): file = io.open("./trie", 'rb') self.trie = Trie.Trie('$') id = file.read(11) while len(id) == 11: self.trie.insert(id.decode()) id = file.read(11) file.close() else: self.db.cursor().execute("drop table if exists Blocks") io.open("./trie", 'xb') self.trie = Trie.Trie('$') self.tip2 = [] self.init_db() self.db.close()
def lexicon_from_file(lexicon_filename): '''takes a file of words and builds Trie from it. Note: each word in the list should end with a newline character. Args: filename (str) Returns: lexicon (Trie)''' lexicon = Trie.Trie() f = open(lexicon_filename, 'r') p = re.compile('qu') for line in f: if re.match('q[^u]', line): continue lexicon.add_word(p.sub('q', line[:-1])) return lexicon
def creatreversetree(): dictlist = [] f1 = open("dict.txt", "r") for words in f1: if duplicate(words) == 1: dictwords = words.strip() dicwords = dictwords[::1] dictlist.append(dicwords) f1.close() research = Trie.Trie() lendict = len(dictlist) for i in range(lendict): research.insert(dictlist[i]) return research
def construct(self, items): trie = Trie.Tree(self.w) trie.extend(items) self.root = trie.root self._add_branch_node(self.root, self.root) # start at next_depth = self.sqrt_log_u, because epsilon was already added self._construct_hash_table(self.root, self.root.left, next_depth=self.sqrt_log_u) self._construct_hash_table(self.root, self.root.right, next_depth=self.sqrt_log_u)
def __init__(self, document): self.textLength = len(document) self.original = document self.buffer = "" self.pieces = [Piece(False, 0, len(document))] self.dictionary = Trie.Trie() # On windows, the dictionary can often be found at: # C:/Users/{username}/AppData/Roaming/Microsoft/Spelling/en-US/default.dic with open( "C:/Users/kirti/AppData/Roaming/Microsoft/Spelling/en-US/default.dic" ) as input_dictionary: for line in input_dictionary: words = line.strip().split(" ") for word in words: self.dictionary.insert(word.lower()) self.pasteText = ""
def __init__(self): self.trie = Trie() self.initTrie() self.view = TrieDemoView() self.view.textEntered.connect(self.handleTextChanged)
# Ben Reynolds 13309656 # Task2 (Test) - Assignment 1 from LexicalAnalyzier import * from Trie import * Lexer = Lexer() Lexer.driver() #Lexer.printTrie() trie = Trie() trie.proccessWord("private") trie.proccessWord("public") trie.proccessWord("protected") trie.proccessWord("static") trie.proccessWord("primary") trie.proccessWord("integer") trie.proccessWord("exception") trie.proccessWord("try")