Example #1
0
def query_thesaurus_com(single_word):
    """
    This function queries thesaurus.com for antonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of antonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_antonyms(single_word, 'thesaurus_com')
        if not check_cache:
            try:
                req = requests.get(
                    f'https://tuna.thesaurus.com/pageData/{single_word}',
                    headers=basic_soup.http_headers,
                    allow_redirects=True,
                    verify=True,
                    timeout=30)
                if '{"data":null}' not in req.text:
                    dict_antonyms = req.json(
                    )['data']['definitionData']['definitions'][0]['antonyms']
                    if dict_antonyms:
                        antonyms = sorted([r["term"] for r in dict_antonyms])
                        caching.insert_word_cache_antonyms(
                            single_word, 'thesaurus_com', antonyms)
                        return antonyms
                    else:
                        logger.error(
                            f'The word {single_word} has no antonyms on thesaurus.com.'
                        )
                else:
                    logger.error(
                        f'The word {single_word} was not found on thesaurus.com.'
                    )
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            antonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return antonyms
    else:
        logger.error(f'The word {single_word} was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')
Example #2
0
    def _validate_word(self):
        """
        This function is designed to validate that the syntax for
        a string variable is in an acceptable format.

        :return: True or False
        :rtype: bool
        """
        valid_word = word_verification.validate_word_syntax(self._word)
        if valid_word:
            return valid_word
        else:
            logger.error(f'The word {self._word} was not in a valid format.')
            logger.error(f'Please verify that the word {self._word} is spelled correctly.')
Example #3
0
def query_synonym_com(single_word):
    """
    This function queries synonym.com for a definition
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: definition for the word
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_definition(single_word, 'synonym_com')
        if not check_cache:
            try:
                results_definition = basic_soup.get_single_page_html(f'https://www.synonym.com/synonyms/{single_word}')
                soup = BeautifulSoup(results_definition, "lxml")
                description_tag = soup.find("meta", property="og:description")
                if 'find any words based on your search' not in description_tag['content']:
                    find_definition = regex.split(r'\|', description_tag['content'])
                    definition_list = find_definition[1].lstrip().replace('definition:', '').split(',')
                    definition = [cleansing.normalize_space(i) for i in definition_list]
                    definition_list_to_string = ' '.join([str(elem) for elem in definition])
                    caching.insert_word_cache_definition(single_word, 'synonym_com', definition_list_to_string)
                    return definition_list_to_string
                else:
                    logger.error(f'synonym.com had no reference for the word {single_word}')
            except bs4.FeatureNotFound as e:
                logger.error('An error occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except AttributeError as e:
                logger.error('An AttributeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except KeyError as e:
                logger.error('A KeyError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except TypeError as e:
                logger.error('A TypeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            definition = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
            return definition
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(f'Please verify that the word {single_word} is spelled correctly.')
Example #4
0
def query_collins_dictionary_synonym(single_word):
    """
    This function queries collinsdictionary.com for a definition
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: definition for the word
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_definition(single_word, 'collins_dictionary')
        if not check_cache:
            try:
                results_definition = basic_soup.get_single_page_html(
                    f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{single_word}')
                query_results = basic_soup.query_html(results_definition, 'div', 'class',
                                                      'form type-def titleTypeSubContainer')
                if query_results is not None:
                    definition = query_results.findNext('div', {'class': 'def'})
                    caching.insert_word_cache_definition(single_word, 'collins_dictionary', definition.text)
                    return definition.text
                else:
                    logger.error(f'Collins Dictionary had no reference for the word {single_word}')
            except bs4.FeatureNotFound as e:
                logger.error('An error occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except AttributeError as e:
                logger.error('An AttributeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except KeyError as e:
                logger.error('A KeyError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except TypeError as e:
                logger.error('A TypeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            definition = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
            return definition
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(f'Please verify that the word {single_word} is spelled correctly.')
Example #5
0
def query_thesaurus_com(single_word):
    """
    This function queries thesaurus.com for a definition
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: definition for the word
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_definition(single_word, 'thesaurus_com')
        if not check_cache:
            try:
                req = requests.get(f'https://tuna.thesaurus.com/pageData/{single_word}',
                                   headers=basic_soup.http_headers,
                                   allow_redirects=True, verify=True, timeout=30)
                if req.json()['data'] is not None:
                    definition = req.json()['data']['definitionData']['definitions'][0]['definition']
                    caching.insert_word_cache_definition(single_word, 'thesaurus_com', definition)
                    return definition
                else:
                    logger.error(f'thesaurus.com had no reference for the word {single_word}')
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            definition = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
            return definition
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(f'Please verify that the word {single_word} is spelled correctly.')
Example #6
0
def query_collins_dictionary_synonym(single_word):
    """
    This function queries collinsdictionary.com for synonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of synonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_synonyms(single_word, 'collins_dictionary')
        if not check_cache:
            try:
                synonyms = []
                results_synonym = basic_soup.get_single_page_html(
                    f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{single_word}'
                )
                soup = BeautifulSoup(results_synonym, 'lxml')
                word_found = soup.find(
                    'h1',
                    text=
                    f'Sorry, no results for “{single_word}” in the English Thesaurus.'
                )
                if word_found:
                    logger.error(
                        f'Collins Dictionary had no reference for the word {single_word}'
                    )
                    logger.error(
                        f'Please verify that the word {single_word} is spelled correctly.'
                    )
                else:
                    query_results = basic_soup.query_html(
                        results_synonym, 'div', 'class', 'blockSyn')
                    content_descendants = query_results.descendants
                    for item in content_descendants:
                        if item.name == 'div' and item.get(
                                'class', 'form type-syn orth'):
                            children = item.findChild('span',
                                                      {'class': 'orth'})
                            if children is not None:
                                synonyms.append(children.text)
                    caching.insert_word_cache_synonyms(single_word,
                                                       'collins_dictionary',
                                                       synonyms)
                    return sorted(synonyms)
            except bs4.FeatureNotFound as e:
                logger.error(
                    'A BeautifulSoup error occurred in the following code segment:'
                )
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except AttributeError as e:
                logger.error(
                    'An AttributeError occurred in the following code segment:'
                )
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except KeyError as e:
                logger.error(
                    'A KeyError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except TypeError as e:
                logger.error(
                    'A TypeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            synonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return synonyms
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')
Example #7
0
def query_wordnet(single_word):
    """
    This function queries wordnet for synonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of synonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_synonyms(single_word, 'wordnet')
        if not check_cache:
            try:
                synonyms = []
                results = requests.get(
                    f'http://wordnetweb.princeton.edu/perl/webwn?s={single_word}',
                    headers=basic_soup.http_headers,
                    allow_redirects=True,
                    verify=True,
                    timeout=30)
                soup = BeautifulSoup(results.text, "lxml")
                if soup.findAll('h3', text='Noun'):
                    parent_node = soup.findAll("ul")[0].findAll('li')
                    for children in parent_node:
                        for child in children.find_all(href=True):
                            if 'S:' not in child.contents[0]:
                                synonyms.append(child.contents[0])
                    synonyms = sorted([x.lower() for x in synonyms])
                    caching.insert_word_cache_synonyms(single_word, 'wordnet',
                                                       synonyms)
                    return synonyms
                else:
                    logger.error(
                        f'Wordnet had no reference for the word {single_word}')
                    logger.error(
                        f'Please verify that the word {single_word} is spelled correctly.'
                    )
            except IndexError as e:
                logger.error(
                    'A IndexError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            synonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return synonyms
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')
Example #8
0
def query_thesaurus_plus(single_word):
    """
    This function queries thesaurus.plus for synonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of synonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_synonyms(single_word, 'thesaurus_plus')
        if not check_cache:
            try:
                synonyms_list = []
                results_synonym = basic_soup.get_single_page_html(
                    f'https://thesaurus.plus/synonyms/{single_word}/category/noun'
                )
                soup = BeautifulSoup(results_synonym, "lxml")
                no_word = soup.find('title', text='404. Page not found')
                if no_word:
                    logger.error(
                        f'thesaurus.plus has no reference for the word {single_word}'
                    )
                    logger.error(
                        f'Please verify that the word {single_word} is spelled correctly.'
                    )
                else:
                    synonyms = []
                    parent_node = soup.find('ul', {
                        'class': 'list paper'
                    }).findAll('li')[1:]
                    for children in parent_node:
                        for child in children.findAll(
                                'div', {'class': 'action_pronounce'}):
                            split_dictionary = str(child.attrs).split(',')
                            synonyms_list.append(split_dictionary[1].replace(
                                "'data-term':", "").replace("'", ""))
                            synonyms = sorted([
                                cleansing.normalize_space(i)
                                for i in synonyms_list
                            ])
                    caching.insert_word_cache_synonyms(single_word,
                                                       'thesaurus_plus',
                                                       synonyms)
                    return synonyms
            except IndexError as e:
                logger.error(
                    'A IndexError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            synonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return synonyms
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')