def __preprocess_search_query(self, query): # create tokens consumed = AnalyzeContent(text_weights=[(query, 1)]) search_tokens = consumed.content_token_list() for i, token in enumerate(search_tokens): # attempt to spellcheck invalid tokens if len(token) >= 3: if self._spellcheck.unknown([token]): search_tokens[i] = self._spellcheck.correction(token) # print('corrected ' + token + ' to ' + search_tokens[i]) token = search_tokens[i] if i > 0 and autocomplete.predict_currword(search_tokens[i - 1]): prev = search_tokens[i - 1] pred = autocomplete.predict(prev, token) else: pred = autocomplete.predict_currword(token) if len(pred) > 0: best_pred = pred[0][0] else: best_pred = token search_tokens[i] = self._lemmatizer.lemmatize(best_pred) # remove stopwords if we have non-stopword terms. otherwise search purely by stopwords as a last case. non_stopwords = [x for x in search_tokens if x not in self._stopwords] if non_stopwords: search_tokens = non_stopwords return search_tokens
def predict(statement=[]): sentence = "".join(statement).split(" ") choices = [] one = None two = None if len(sentence) > 1: length = len( autocomplete.predict_currword_given_lastword( sentence[-2], sentence[-1])) if length >= 1: one = autocomplete.predict_currword_given_lastword( sentence[-2], sentence[-1])[0] if length >= 2: two = autocomplete.predict_currword_given_lastword( sentence[-2], sentence[-1])[1] else: length = len(autocomplete.predict_currword(sentence[-1])) if length >= 1: one = autocomplete.predict_currword(sentence[-1])[0] if length >= 2: two = autocomplete.predict_currword(sentence[-1])[1] if one is not None: one = extract(one) if two is not None: two = extract(two) if one is not None: choices.append(one) else: choices.append("N/A") choices.append(sentence[-1]) if two is not None: choices.append(two) else: choices.append("N/A") print " - ".join(choices) return choices
def __init__(self, word, prev_word, num_choices): if prev_word is None: word_list = autocomplete.predict_currword(word,num_choice) else: tmp_wordlist1 = autocomplete.predict_currword(word,num_choice) tmp_wordlist2 = autocomplete.predict_currword_given_lastword(prev_word, word, num_choice) if len(tmp_wordlist1) > len(tmp_wordlist2): word_list = tmp_wordlist1 else: word_list = tmp_wordlist2 self.w_size = len(word_list) for i in range(0, len(word_list)-1): (word_list[i])[1] = (len(word_list)/2 - 1)+((-1)^(i+1))*i self.make_wordTree(word_list)
def get_ac_list(str, count): ac_list = [] words = str.split() # Note that the auto-complete library only supports up to two full words if len(words) > 2: return ac_list if len(words) > 1: try: l = autocomplete.predict(words[0], words[1], count) for name,num in l: ac_list.append(words[0]+' '+name) except: # Due to a bug in the auto-complete library, # words that contain number (e.g. abcde10) doesn't get # handled properly return [] elif len(words) == 1: l = autocomplete.predict_currword(words[0], count) for name,num in l: ac_list.append(name) return ac_list
def display(): global curr_word #print(curr_word) result = autocomplete.predict_currword(curr_word, top_n=1000) #print(result) total = calculate_frequency(result) #bn print(str(total) + "\n") ignite(total)
def autocomplete_word(text): try: autocomplete.load() autoword = autocomplete.predict_currword(text) autoword = re.sub(r'[^\w]', ' ', str(autoword[0])) autoword = re.sub(r'\d+', ' ', autoword) return True,autoword.strip() except: return False,"Can not predict word"
async def completing(letters): """ predict_currword() """ text = ' ' words = await Complete.fetch_all_saved_user_events() for i in range(0, len(words)): pos = words[i] text = text + ' ' + str(pos['event']) models.train_models(text) return autocomplete.predict_currword(letters)
def predict_word(text, max_suggestions=10): if " " not in text: results = [ x[0] for x in autocomplete.predict_currword(text.lower(), top_n=max_suggestions) ] else: words = text.lower().split(" ") results = [ x[0] for x in autocomplete.predict_currword_given_lastword( words[-2], words[-1], top_n=max_suggestions) ] return results
def predictCity(self, string, threshold, newLocationDir, export=True): if not string: return None words = self.getFuzzTags(string) match = None score = 0 autoCompleteCache = {} for word in words: searchWord = word[:4].strip().lower() if autoCompleteCache.get(searchWord, False): predictedWords = autoCompleteCache.get(searchWord, 0) else: predictedWords = autocomplete.predict_currword(searchWord, top_n=10) autoCompleteCache[searchWord] = predictedWords for predictedWord in predictedWords: predictedCityNames = self.cityNames[predictedWord[0]] if not predictedCityNames: continue for candidate in predictedCityNames: newScore = fuzz.token_sort_ratio(string, candidate) if newScore > score: score = newScore match = candidate if export is True: f = open(os.path.join(newLocationDir, 'no_match.txt'), 'a+') if match is None: if export is True: f.write('{0},{1},"{2}"\n'.format(str(score), str(match), string)) f.close() return False #No match if score >= threshold: print 'match:' + str(score) + '---' + str(match) + '---' + string return match else: if export is True: f.write('{0},"{1}","{2}"\n'.format(str(score), str(match), string)) f.close() return True #Not close enough match
def predictCity(self,string,threshold,newLocationDir,export=True): if not string: return None words = self.getFuzzTags(string) match = None score = 0 autoCompleteCache = {} for word in words: searchWord = word[:4].strip().lower() if autoCompleteCache.get(searchWord,False): predictedWords = autoCompleteCache.get(searchWord,0) else: predictedWords = autocomplete.predict_currword(searchWord,top_n=10) autoCompleteCache[searchWord] = predictedWords for predictedWord in predictedWords: predictedCityNames = self.cityNames[predictedWord[0]] if not predictedCityNames: continue for candidate in predictedCityNames: newScore = fuzz.token_sort_ratio(string,candidate) if newScore > score: score = newScore match = candidate if export is True: f = open(os.path.join(newLocationDir,'no_match.txt'),'a+') if match is None: if export is True: f.write('{0},{1},"{2}"\n'.format(str(score),str(match),string)) f.close() return False #No match if score >= threshold: print 'match:'+str(score)+'---'+str(match)+'---'+string return match else: if export is True: f.write('{0},"{1}","{2}"\n'.format(str(score),str(match),string)) f.close() return True #Not close enough match
def predict_character(text, max_suggestions=27): if " " not in text: results = autocomplete.predict_currword(text.lower(), top_n=max_suggestions) characters = [ x[0][len(text)] if not x[0][len(text):] == "" else " " for x in results ] else: words = text.lower().split(" ") results = autocomplete.predict_currword_given_lastword( words[-2], words[-1], top_n=max_suggestions) characters = [ x[0][len(words[-1])] if not x[0][len(words[-1]):] == "" else " " for x in results ] probabilities = [x[1] for x in results] character_dict = {} for character in range(len(characters)): character_dict[characters[character]] = sum([ probabilities[x] for x in range(len(characters)) if characters[character] == characters[x] ]) return sorted(character_dict, key=character_dict.__getitem__, reverse=True)