Beispiel #1
0
def get_rhymes(word):
    '''
    Given a word, it queries the a rhymes database to get a list of 5
    words with similar endings.
    '''
    api = datamuse.Datamuse()
    response = api.words(rel_rhy=word, max=5)
    return list(map(lambda dict: dict["word"], response))
Beispiel #2
0
 def get_results(self, query, max_results=20):
     api = datamuse.Datamuse(max_results=max_results)
     if ':' in query:
         sp, meaning = query.split(":", 1)
     else:
         sp, meaning = query, None
     words = api.words(sp=sp, ml=meaning)
     if len(words) < max_results:
         return [x['word'] for x in words], False, len(words)
     return [x['word'] for x in words], True, None
def get_related_tags(main_tags):
    '''
    Returns top 10 related words according to datamuse API
    '''
    api = datamuse.Datamuse()
    all_tags = []
    for tag in main_tags:
        words = api.words(ml=tag, max=10)
        for word in words:
            all_tags.append(word.get('word'))


#     Remove duplicates from list
    unique_tags = list(set(all_tags))
    print(f'Number of tags: {len(unique_tags)}')
    return unique_tags
Beispiel #4
0
    'par',
    'bga',
    'bgb',
    'rhy',
    'nry',
    'hom',
    'cns',
]

possible_parts_of_speech = [choice[0] for choice in PartOfSpeech.part_choices]

# Get an instance of a logger
logger = logging.getLogger(__name__)

# Get a DataMuse instance for making queries through python-datamuse
api = datamuse.Datamuse()


class DatamuseWordNotRecognizedError(Exception):
    """Exception indicating a Datamuse query failed because the word does not exist in the Datamuse data.

    Should be raised when a query to find an exact word by its spelling (using the Datamuse 'sp' parameter) returns no
    results, not when a query to find related words fails."""
    def __init__(self, word_string):
        self.word_string = word_string
        self.message = f'word "{word_string}" was not recognized by Datamuse'


def query_with_retry(retries: int, wait: float, **kwargs):
    """Datamuse query with kwargs, retry a certain number of times (wait a number of seconds in between) on failure."""
    for i in range(0, retries):
Beispiel #5
0
def get_synonyms(query: str):
    dm = datamuse.Datamuse()
    return [res["word"] for res in dm.words(ml=query)][:10]
Beispiel #6
0
def login():
    global cache
    cache = request.url
    s = request.environ.get('beaker.session')
    if 'user' in s:
        email = s['user']
        loggin = 1
        if email not in globalKeywords:
            globalKeywords[email] = {}
    else:
        loggin = 0

    keywords = request.query.get('keywords')
    page_no = request.query.get('page_no')
    origin = request.query.get('origin')
    localKeywords = []
    localCount = []
    # If we dont receive any input keywords, return to home page

    if not keywords:
        popularKeywords = None
        if loggin:
            if globalKeywords[email]:
                # Sort the dict into a list of tuples from small to large (according to value, not key)
                sortedGlobalKywds = sorted(globalKeywords[email].items(),
                                           key=operator.itemgetter(1))
                # Reverse the list to start from large to small (according to value, not key)
                popularKeywords = sortedGlobalKywds[::-1]
        response.set_header('Cache-Control',
                            'no-cache, no-store, max-age=0, must-revalidate')
        return template('home',
                        popularKeywords=popularKeywords,
                        loggin=loggin,
                        userInfo=s,
                        root=ROOT)
    else:
        lowerCase = keywords.lower().split()
        # Loop through every keyword in keywords string (lower cased )
        for word in lowerCase:
            if word not in localKeywords:
                # First store the appreance of each words in input string
                wordCount = lowerCase.count(word)
                localKeywords.append(word)
                localCount.append(wordCount)
            # Then update the wordCount for every keyword to determine the most popular 20 ones
            if loggin:
                if word in globalKeywords:
                    globalKeywords[email][
                        word] = globalKeywords[email][word] + wordCount
                else:
                    globalKeywords[email][word] = wordCount

        keywords_suggested = keywords
        if not origin:
            # spell correction check
            try:
                api = datamuse.Datamuse()
                query = api.suggest(s=keywords, max=10)
                query_score = int(query[0]['score'])
                query_sug = str(query[0]['word'])
                print query_sug
                print query_score
                if query_score < 1000:
                    raise ValueError('score too low')
                keywords_suggested = query_sug
            except (IndexError, ValueError):
                # spell correct each word if full sentence does not have valid predict
                lowerCase = [
                    str(api.suggest(s=x, max=10)[0]['word'])
                    if int(api.suggest(s=x, max=10)[0]['score']) > 1000 else x
                    for x in lowerCase
                ]
                keywords_suggested = " ".join(lowerCase)
                print keywords_suggested

        urls = GetResults(keywords_suggested)
        if urls is not None:
            search = urls[(int(page_no) * 7 - 7):(int(page_no) * 7)]
            total = len(urls) / 7

            if len(urls) % 7 is not 0:
                total = total + 1
            if total < 5:
                i = 0
                j = total
                pagination = [None] * total
            elif int(page_no) - 3 <= 0:
                i = 0
                j = 5
                pagination = [None] * 5
            elif total - int(page_no) <= 2:
                i = total - 5
                j = total
                pagination = [None] * 5
            else:
                i = int(page_no) - 3
                j = int(page_no) + 2
                pagination = [None] * 5
            for n in range(i, j):
                if (n + 1) is int(page_no):
                    pagination[n - i] = ('active', n + 1)
                else:
                    pagination[n - i] = ('None', n + 1)
            print pagination
            #response.set_header('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate')
            return template('search',
                            loggin=loggin,
                            userInfo=s,
                            pgn=pagination,
                            srch=search,
                            keywords=keywords,
                            key_sug=keywords_suggested,
                            currentpage=int(page_no),
                            maxpage=total)
        else:
            #response.set_header('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate')
            return template('search',
                            loggin=loggin,
                            userInfo=s,
                            pgn=[],
                            srch=[],
                            keywords=keywords,
                            key_sug=keywords_suggested,
                            currentpage=1,
                            maxpage=1)
        '''