def get_rhymes(word): ''' Given a word, it queries the a rhymes database to get a list of 5 words with similar endings. ''' api = datamuse.Datamuse() response = api.words(rel_rhy=word, max=5) return list(map(lambda dict: dict["word"], response))
def get_results(self, query, max_results=20): api = datamuse.Datamuse(max_results=max_results) if ':' in query: sp, meaning = query.split(":", 1) else: sp, meaning = query, None words = api.words(sp=sp, ml=meaning) if len(words) < max_results: return [x['word'] for x in words], False, len(words) return [x['word'] for x in words], True, None
def get_related_tags(main_tags): ''' Returns top 10 related words according to datamuse API ''' api = datamuse.Datamuse() all_tags = [] for tag in main_tags: words = api.words(ml=tag, max=10) for word in words: all_tags.append(word.get('word')) # Remove duplicates from list unique_tags = list(set(all_tags)) print(f'Number of tags: {len(unique_tags)}') return unique_tags
'par', 'bga', 'bgb', 'rhy', 'nry', 'hom', 'cns', ] possible_parts_of_speech = [choice[0] for choice in PartOfSpeech.part_choices] # Get an instance of a logger logger = logging.getLogger(__name__) # Get a DataMuse instance for making queries through python-datamuse api = datamuse.Datamuse() class DatamuseWordNotRecognizedError(Exception): """Exception indicating a Datamuse query failed because the word does not exist in the Datamuse data. Should be raised when a query to find an exact word by its spelling (using the Datamuse 'sp' parameter) returns no results, not when a query to find related words fails.""" def __init__(self, word_string): self.word_string = word_string self.message = f'word "{word_string}" was not recognized by Datamuse' def query_with_retry(retries: int, wait: float, **kwargs): """Datamuse query with kwargs, retry a certain number of times (wait a number of seconds in between) on failure.""" for i in range(0, retries):
def get_synonyms(query: str): dm = datamuse.Datamuse() return [res["word"] for res in dm.words(ml=query)][:10]
def login(): global cache cache = request.url s = request.environ.get('beaker.session') if 'user' in s: email = s['user'] loggin = 1 if email not in globalKeywords: globalKeywords[email] = {} else: loggin = 0 keywords = request.query.get('keywords') page_no = request.query.get('page_no') origin = request.query.get('origin') localKeywords = [] localCount = [] # If we dont receive any input keywords, return to home page if not keywords: popularKeywords = None if loggin: if globalKeywords[email]: # Sort the dict into a list of tuples from small to large (according to value, not key) sortedGlobalKywds = sorted(globalKeywords[email].items(), key=operator.itemgetter(1)) # Reverse the list to start from large to small (according to value, not key) popularKeywords = sortedGlobalKywds[::-1] response.set_header('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate') return template('home', popularKeywords=popularKeywords, loggin=loggin, userInfo=s, root=ROOT) else: lowerCase = keywords.lower().split() # Loop through every keyword in keywords string (lower cased ) for word in lowerCase: if word not in localKeywords: # First store the appreance of each words in input string wordCount = lowerCase.count(word) localKeywords.append(word) localCount.append(wordCount) # Then update the wordCount for every keyword to determine the most popular 20 ones if loggin: if word in globalKeywords: globalKeywords[email][ word] = globalKeywords[email][word] + wordCount else: globalKeywords[email][word] = wordCount keywords_suggested = keywords if not origin: # spell correction check try: api = datamuse.Datamuse() query = api.suggest(s=keywords, max=10) query_score = int(query[0]['score']) query_sug = str(query[0]['word']) print query_sug print query_score if query_score < 1000: raise ValueError('score too low') keywords_suggested = query_sug except (IndexError, ValueError): # spell correct each word if full sentence does not have valid predict lowerCase = [ str(api.suggest(s=x, max=10)[0]['word']) if int(api.suggest(s=x, max=10)[0]['score']) > 1000 else x for x in lowerCase ] keywords_suggested = " ".join(lowerCase) print keywords_suggested urls = GetResults(keywords_suggested) if urls is not None: search = urls[(int(page_no) * 7 - 7):(int(page_no) * 7)] total = len(urls) / 7 if len(urls) % 7 is not 0: total = total + 1 if total < 5: i = 0 j = total pagination = [None] * total elif int(page_no) - 3 <= 0: i = 0 j = 5 pagination = [None] * 5 elif total - int(page_no) <= 2: i = total - 5 j = total pagination = [None] * 5 else: i = int(page_no) - 3 j = int(page_no) + 2 pagination = [None] * 5 for n in range(i, j): if (n + 1) is int(page_no): pagination[n - i] = ('active', n + 1) else: pagination[n - i] = ('None', n + 1) print pagination #response.set_header('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate') return template('search', loggin=loggin, userInfo=s, pgn=pagination, srch=search, keywords=keywords, key_sug=keywords_suggested, currentpage=int(page_no), maxpage=total) else: #response.set_header('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate') return template('search', loggin=loggin, userInfo=s, pgn=[], srch=[], keywords=keywords, key_sug=keywords_suggested, currentpage=1, maxpage=1) '''