def find_hyponyms(self): """ Purpose ---------- This function queries classicthesaurus_com for hyponyms associated with the specific word provided to the Class Hyponyms. Returns ---------- :returns: hyponyms: list of hyponyms :rtype: list Raises ---------- :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ valid_word = self._validate_word() if valid_word: check_cache = self._check_cache() if check_cache is False: try: if self._proxies is None: response = Query( f'https://www.classicthesaurus.com/{self._word}/narrower' ).get_single_page_html() if response.status_code == 404: logger.info( f'Classic Thesaurus had no hyponyms reference for the word {self._word}' ) else: soup = BeautifulSoup(response.text, "lxml") hyponym = _get_hyponyms(soup) if 'no hyponyms found' in hyponym: return f'No hyponyms were found for the word: {self._word}' else: number_of_pages = _get_number_of_pages(soup) if number_of_pages >= 2: for page in range(2, number_of_pages): sub_html = Query( f'https://www.classicthesaurus.com/{self._word}/narrower/' f'{page}').get_single_page_html() sub_soup = BeautifulSoup( sub_html.text, 'lxml') additional_hyponym = _get_hyponyms( sub_soup) hyponym.union(additional_hyponym) self._update_cache(sorted(hyponym)) return sorted(set(hyponym)) elif self._proxies is not None: response = Query( f'https://www.classicthesaurus.com/{self._word}/narrower', self._proxies).get_single_page_html() if response.status_code == 404: logger.info( f'Classic Thesaurus had no hyponyms reference for the word {self._word}' ) else: soup = BeautifulSoup(response.text, "lxml") hyponym = _get_hyponyms(soup) if 'no hyponyms found' in hyponym: return f'No hyponyms were found for the word: {self._word}' else: number_of_pages = _get_number_of_pages(soup) if number_of_pages >= 2: for page in range(2, number_of_pages): sub_html = Query( f'https://www.classicthesaurus.com/{self._word}/narrower/' f'{page}', self._proxies ).get_single_page_html() sub_soup = BeautifulSoup( sub_html.text, 'lxml') additional_hyponym = _get_hyponyms( sub_soup) hyponym.union(additional_hyponym) self._update_cache(sorted(hyponym)) return sorted(set(hyponym)) except bs4.FeatureNotFound as error: logger.error( 'An error occurred in the following code segment:') logger.error(''.join( traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error( 'An AttributeError occurred in the following code segment:' ) logger.error(''.join( traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error( 'An IndexError occurred in the following code segment:' ) logger.error(''.join( traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error( 'A KeyError occurred in the following code segment:') logger.error(''.join( traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error( 'A TypeError occurred in the following code segment:') logger.error(''.join( traceback.format_tb(error.__traceback__))) else: hyponym = cleansing.flatten_multidimensional_list( [val for val in check_cache.values()]) return hyponym
def _query_merriam_webster(self): """ This function queries merriam-webster.com for a definition associated with the specific word provided to the Class Definitions :returns: definitions: definition for a word :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: if self._proxies is None: response = Query( f'https://www.merriam-webster.com/dictionary/{self._word}' ).get_single_page_html() if response.status_code == 404: logger.info( f'Merriam-webster.com has no definition reference for the word {self._word}' ) else: definition_list = [] soup = BeautifulSoup(response.text, "lxml") pattern = regex.compile(r'Words fail us') if soup.find(text=pattern): logger.info( f'Merriam-webster.com has no reference for the word {self._word}' ) elif soup.find('h1', {'class': 'mispelled-word'}): logger.info( f'Merriam-webster.com has no definition reference for the word {self._word}' ) else: dictionary_entry = soup.find( 'div', {'id': 'dictionary-entry-1'}) definition_container = dictionary_entry.find( 'div', {'class': 'vg'}) definition_entries = definition_container.find_all( 'span', {'class': 'sb-0'})[0] for definition_entry in definition_entries.find_all( 'span', {'class': 'dtText'}): definition_list.append( definition_entry.text.lower().replace( ':', '').strip()) definitions = sorted([ cleansing.normalize_space(i) for i in definition_list ]) self._update_cache(definitions) return definitions elif self._proxies is not None: response = Query( f'https://www.merriam-webster.com/dictionary/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info( f'Merriam-webster.com has no definition reference for the word {self._word}' ) else: definition_list = [] soup = BeautifulSoup(response.text, "lxml") pattern = regex.compile(r'Words fail us') if soup.find(text=pattern): logger.info( f'Merriam-webster.com has no reference for the word {self._word}' ) elif soup.find('h1', {'class': 'mispelled-word'}): logger.info( f'Merriam-webster.com has no definition reference for the word {self._word}' ) else: dictionary_entry = soup.find( 'div', {'id': 'dictionary-entry-1'}) definition_container = dictionary_entry.find( 'div', {'class': 'vg'}) definition_entries = definition_container.find_all( 'span', {'class': 'sb-0'})[0] for definition_entry in definition_entries.find_all( 'span', {'class': 'dtText'}): definition_list.append( definition_entry.text.lower().replace( ':', '').strip()) definitions = sorted([ cleansing.normalize_space(i) for i in definition_list ]) self._update_cache(definitions) return definitions except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error( 'An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error( 'An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_synonym_com(self): """ This function queries synonym.com for a definition associated with the specific word provided to the Class Definitions :returns: definitions: definition for a word :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: if self._proxies is None: response = Query( f'https://www.synonym.com/synonyms/{self._word}' ).get_single_page_html() if response.status_code == 404: logger.info( f'Synonym.com had no definition reference for the word {self._word}' ) else: definition_list = [] soup = BeautifulSoup(response.text, "lxml") status_tag = soup.find("meta", {"name": "pagetype"}) pattern = regex.compile(r'Oops, 404!') if soup.find(text=pattern): logger.info( f'Synonym.com had no definition reference for the word {self._word}' ) elif status_tag.attrs['content'] == 'Term': dictionary_entries = soup.find( 'h3', {'class': 'section-title'}) dictionary_entry = dictionary_entries.find_next( 'p').text remove_brackets = regex.sub(r'.*?\[.*?\]', '', dictionary_entry) definition_list.append(remove_brackets.strip()) definitions = sorted( [x.lower() for x in definition_list]) self._update_cache(definitions) return sorted(definitions) elif self._proxies is not None: response = Query( f'https://www.synonym.com/synonyms/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info( f'Synonym.com had no definition reference for the word {self._word}' ) else: definition_list = [] soup = BeautifulSoup(response.text, "lxml") status_tag = soup.find("meta", {"name": "pagetype"}) pattern = regex.compile(r'Oops, 404!') if soup.find(text=pattern): logger.info( f'Synonym.com had no definition reference for the word {self._word}' ) elif status_tag.attrs['content'] == 'Term': dictionary_entries = soup.find( 'h3', {'class': 'section-title'}) dictionary_entry = dictionary_entries.find_next( 'p').text remove_brackets = regex.sub(r'.*?\[.*?\]', '', dictionary_entry) definition_list.append(remove_brackets.strip()) definitions = sorted( [x.lower() for x in definition_list]) self._update_cache(definitions) return sorted(definitions) except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error( 'An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error( 'An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def find_hypernyms(self): """ Purpose ---------- This function queries classicthesaurus_com for hypernyms associated with the specific word provided to the Class Hypernyms. Returns ---------- :returns: hypernym: list of hypernyms :rtype: list Raises ---------- :raises AttributeError: Raised when an attribute reference or assignment fails IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys TypeError: Raised when an operation or function is applied to an object of inappropriate type bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ valid_word = self._validate_word() if valid_word: check_cache = self._check_cache() if check_cache[0] is True: hypernym = cleansing.flatten_multidimensional_list(list(check_cache[1])) if self._output_format == 'list': return sorted(hypernym) elif self._output_format == 'dictionary': output_dict = {self._word: sorted(set(hypernym))} return output_dict elif self._output_format == 'json': json_object = json.dumps({'hypernyms': {self._word: sorted(set(hypernym))}}, indent=4, ensure_ascii=False) return json_object elif check_cache[0] is False: try: response = '' if self._proxies is None: if self._user_agent is None: response = Query( f'https://www.classicthesaurus.com/{self._word}/broader').get_single_page_html() elif self._user_agent is not None: response = Query(f'https://www.classicthesaurus.com/{self._word}/broader', user_agent=self._user_agent).get_single_page_html() elif self._proxies is not None: if self._user_agent is None: response = Query(f'https://www.classicthesaurus.com/{self._word}/broader', proxies=self._proxies).get_single_page_html() elif self._user_agent is not None: response = Query(f'https://www.classicthesaurus.com/{self._word}/broader', user_agent=self._user_agent, proxies=self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Classic Thesaurus had no hypernyms reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") cloudflare_protection = CloudflareVerification('https://www.classicthesaurus.com', soup).cloudflare_protected_url() if cloudflare_protection is False: hypernym = _get_hypernyms(soup) if 'no hypernyms found' in hypernym: return _colorized_text(255, 0, 255, f'No hypernyms were found for the word: {self._word} \n' f'Please verify that the word is spelled correctly.') else: number_of_pages = _get_number_of_pages(soup) if number_of_pages >= 2: for page in range(2, number_of_pages): sub_html = '' if self._proxies is None: if self._user_agent is None: sub_html = Query( f'https://www.classicthesaurus.com/{self._word}/broader/{page}').get_single_page_html() elif self._user_agent is not None: sub_html = Query( f'https://www.classicthesaurus.com/{self._word}/broader/{page}', user_agent=self._user_agent).get_single_page_html() elif self._proxies is not None: if self._user_agent is None: sub_html = Query( f'https://www.classicthesaurus.com/{self._word}/broader/{page}', proxies=self._proxies).get_single_page_html() elif self._user_agent is not None: sub_html = Query( f'https://www.classicthesaurus.com/{self._word}/broader/{page}', user_agent=self._user_agent, proxies=self._proxies).get_single_page_html() sub_soup = BeautifulSoup(sub_html.text, 'lxml') additional_hypernym = _get_hypernyms(sub_soup) if additional_hypernym: hypernym.union(additional_hypernym) self._update_cache(hypernym) if self._output_format == 'list': return sorted(set(hypernym)) elif self._output_format == 'dictionary': output_dict = {self._word: sorted(set(hypernym))} return output_dict elif self._output_format == 'json': json_object = json.dumps({'hypernyms': {self._word: sorted(set(hypernym))}}, indent=4, ensure_ascii=False) return json_object elif cloudflare_protection is True: logger.info('-' * 80) logger.info(f'The following URL has Cloudflare DDoS mitigation service protection.') logger.info('https://www.classicthesaurus.com') logger.info('-' * 80) return None except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_collins_dictionary(self): """ This function queries collinsdictionary.com for a definition associated with the specific word provided to the Class Definitions. :returns: definition: definition for a word :rtype: str :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: if self._proxies is None: response = Query( f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{self._word}' ).get_single_page_html() if response.status_code == 404: logger.error( f'Collins Dictionary had no definition reference for the word {self._word}' ) else: soup = BeautifulSoup(response.text, "lxml") query_results = soup.find( 'div', {'class': 'form type-def titleTypeSubContainer'}) if query_results is not None: definition = query_results.findNext( 'div', {'class': 'def'}) self._update_cache(definition.text) return definition.text else: logger.error( f'Collins Dictionary had no definition reference for the word {self._word}' ) elif self._proxies is not None: response = Query( f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.error( f'Collins Dictionary had no definition reference for the word {self._word}' ) else: soup = BeautifulSoup(response.text, "lxml") query_results = soup.find( 'div', {'class': 'form type-def titleTypeSubContainer'}) if query_results is not None: definition = query_results.findNext( 'div', {'class': 'def'}) self._update_cache(definition.text) return definition.text else: logger.error( f'Collins Dictionary had no definition reference for the word {self._word}' ) except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error( 'An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error( 'An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_thesaurus_com(self): """ This function queries thesaurus.com for synonyms associated with the specific word provided to the Class Synonyms. :returns: synonyms: list of synonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: synonyms_list = [] if self._proxies is None: response = Query(f'https://www.thesaurus.com/browse/{self._word}').get_single_page_html() if response.status_code == 404: logger.info(f'Thesaurus.com had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") status_tag = soup.find("h1") if status_tag.text.startswith('0 results for'): logger.info(f'Thesaurus.com had no synonym reference for the word {self._word}') else: synonyms = [] word_container = soup.find('div', {'data-testid': 'word-grid-container'}) for list_item in word_container.find('ul').find_all('li'): for link in list_item.find_all('a', href=True): synonyms_list.append(link.text) synonyms = sorted([cleansing.normalize_space(i) for i in synonyms_list]) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return synonyms elif self._proxies is not None: response = Query(f'https://www.thesaurus.com/browse/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Thesaurus.com had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") status_tag = soup.find("h1") if status_tag.text.startswith('0 results for'): logger.info(f'Thesaurus.com had no synonym reference for the word {self._word}') else: synonyms = [] word_container = soup.find('div', {'data-testid': 'word-grid-container'}) for list_item in word_container.find('ul').find_all('li'): for link in list_item.find_all('a', href=True): synonyms_list.append(link.text) synonyms = sorted([cleansing.normalize_space(i) for i in synonyms_list]) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return synonyms except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_wordnet(self): """ This function queries wordnet for synonyms associated with the specific word provided to the Class Synonyms. :returns: synonyms: list of synonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: synonyms = [] if self._proxies is None: response = Query(f'http://wordnetweb.princeton.edu/perl/webwn?s={self._word}').get_single_page_html() if response.status_code == 404: logger.info(f'Wordnet had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") pattern = regex.compile(r'Your search did not return any results') if soup.find(text=pattern): logger.info(f'Wordnet had no synonym reference for the word {self._word}') else: if soup.findAll('h3', text='Noun'): parent_node = soup.findAll("ul")[0].findAll('li') for children in parent_node: for child in children.find_all(href=True): if 'S:' not in child.contents[0]: synonyms.append(child.contents[0]) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return synonyms else: logger.info(f'Wordnet had no synonym reference for the word {self._word}') elif self._proxies is not None: response = Query(f'http://wordnetweb.princeton.edu/perl/webwn?s={self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Wordnet had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") pattern = regex.compile(r'Your search did not return any results') if soup.find(text=pattern): logger.info(f'Wordnet had no synonym reference for the word {self._word}') else: if soup.findAll('h3', text='Noun'): parent_node = soup.findAll("ul")[0].findAll('li') for children in parent_node: for child in children.find_all(href=True): if 'S:' not in child.contents[0]: synonyms.append(child.contents[0]) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return synonyms else: logger.info(f'Wordnet had no synonym reference for the word {self._word}') except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_synonym_com(self): """ This function queries synonym.com for synonyms associated with the specific word provided to the Class Synonyms. :returns: synonyms: list of synonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: if self._proxies is None: response = Query(f'https://www.synonym.com/synonyms/{self._word}').get_single_page_html() if response.status_code == 404: logger.info(f'Synonym.com had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") status_tag = soup.find("meta", {"name": "pagetype"}) pattern = regex.compile(r'Oops, 404!') if soup.find(text=pattern): logger.info(f'Synonym.com had no synonym reference for the word {self._word}') elif status_tag.attrs['content'] == 'Term': if soup.find('div', {'data-section': 'synonyms'}): synonyms_class = soup.find('div', {'data-section': 'synonyms'}) synonyms = [word.text for word in synonyms_class.find('ul', {'class': 'section-list'}).find_all('li')] synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return sorted(synonyms) else: logger.info(f'Synonym.com had no synonym reference for the word {self._word}') elif self._proxies is not None: response = Query(f'https://www.synonym.com/synonyms/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Synonym.com had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") status_tag = soup.find("meta", {"name": "pagetype"}) pattern = regex.compile(r'Oops, 404!') if soup.find(text=pattern): logger.info(f'Synonym.com had no synonym reference for the word {self._word}') elif status_tag.attrs['content'] == 'Term': if soup.find('div', {'data-section': 'synonyms'}): synonyms_class = soup.find('div', {'data-section': 'synonyms'}) synonyms = [word.text for word in synonyms_class.find('ul', {'class': 'section-list'}).find_all('li')] synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return sorted(synonyms) else: logger.info(f'Synonym.com had no synonym reference for the word {self._word}') except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.info('\n') logger.info(self._word) logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) logger.info('\n') except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_merriam_webster(self): """ This function queries merriam-webster.com for synonyms associated with the specific word provided to the Class Synonyms. :returns: synonyms: list of synonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: synonyms_list = [] if self._proxies is None: response = Query(f'https://www.merriam-webster.com/thesaurus/{self._word}').get_single_page_html() if response.status_code == 404: logger.info(f'Merriam-webster.com had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") pattern = regex.compile(r'Words fail us') if soup.find(text=pattern): logger.info(f'Merriam-webster.com had no synonym reference for the word {self._word}') elif soup.find('h1', {'class': 'mispelled-word'}): logger.info(f'Merriam-webster.com had no synonym reference for the word {self._word}') else: synonyms = [] if soup.find('p', {'class': 'function-label'}): label = soup.find('p', {'class': 'function-label'}) if label.text.startswith('Synonyms for'): parent_tag = soup.find("span", {'class': 'thes-list syn-list'}) word_container = parent_tag.find('div', {'class': 'thes-list-content synonyms_list'}) for list_item in word_container.find_all("ul", {'class': 'mw-list'}): for link in list_item.find_all('a', href=True): synonyms_list.append(link.text) synonyms = sorted([cleansing.normalize_space(i) for i in synonyms_list]) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return synonyms elif self._proxies is not None: response = Query(f'https://www.merriam-webster.com/thesaurus/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Merriam-webster.com had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") pattern = regex.compile(r'Words fail us') if soup.find(text=pattern): logger.info(f'Merriam-webster.com had no synonym reference for the word {self._word}') elif soup.find('h1', {'class': 'mispelled-word'}): logger.info(f'Merriam-webster.com had no synonym reference for the word {self._word}') else: synonyms = [] if soup.find('p', {'class': 'function-label'}): label = soup.find('p', {'class': 'function-label'}) if label.text.startswith('Synonyms for'): parent_tag = soup.find("span", {'class': 'thes-list syn-list'}) word_container = parent_tag.find('div', {'class': 'thes-list-content synonyms_list'}) for list_item in word_container.find_all("ul", {'class': 'mw-list'}): for link in list_item.find_all('a', href=True): synonyms_list.append(link.text) synonyms = sorted([cleansing.normalize_space(i) for i in synonyms_list]) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return synonyms except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_collins_dictionary(self): """ This function queries collinsdictionary.com for synonyms associated with the specific word provided to the Class Synonyms. :returns: synonyms: list of synonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails. IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys. TypeError: Raised when an operation or function is applied to an object of inappropriate type. bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: synonyms = [] if self._proxies is None: response = Query(f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{self._word}').get_single_page_html() if response.status_code == 404: logger.info(f'Collins Dictionary had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, 'lxml') word_found = soup.find('h1', text=f'Sorry, no results for “{self._word}” in the English Thesaurus.') if word_found: logger.info(f'Collins Dictionary had no synonym reference for the word {self._word}') else: if soup.find('div', {'class': 'blockSyn'}): query_results = soup.find('div', {'class': 'blockSyn'}) for primary_syn in query_results.find_all('div', {'class', 'form type-syn orth'}): synonyms.append(primary_syn.text) for sub_syn in query_results.find_all('div', {'class', 'form type-syn'}): child = sub_syn.findChild('span', {'class': 'orth'}) synonyms.append(child.text) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return sorted(synonyms) elif self._proxies is not None: response = Query(f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{self._word}', self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Collins Dictionary had no synonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, 'lxml') word_found = soup.find('h1', text=f'Sorry, no results for “{self._word}” in the English Thesaurus.') if word_found: logger.info(f'Collins Dictionary had no synonym reference for the word {self._word}') else: if soup.find('div', {'class': 'blockSyn'}): query_results = soup.find('div', {'class': 'blockSyn'}) for primary_syn in query_results.find_all('div', {'class', 'form type-syn orth'}): synonyms.append(primary_syn.text) for sub_syn in query_results.find_all('div', {'class', 'form type-syn'}): child = sub_syn.findChild('span', {'class': 'orth'}) synonyms.append(child.text) synonyms = sorted([x.lower() for x in synonyms]) self._update_cache(synonyms) return sorted(synonyms) except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_wordhippo(self): """ This function queries wordhippo.com for antonyms associated with the specific word provided to the Class Antonyms. :returns: antonyms: list of antonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys TypeError: Raised when an operation or function is applied to an object of inappropriate type bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: antonyms = [] response = '' if self._proxies is None: if self._user_agent is None: response = Query( f'https://www.wordhippo.com/what-is/the-opposite-of/{self._word}.html').get_single_page_html() elif self._user_agent is not None: response = Query(f'https://www.wordhippo.com/what-is/the-opposite-of/{self._word}.html', user_agent=self._user_agent).get_single_page_html() elif self._proxies is not None: if self._user_agent is None: response = Query(f'https://www.wordhippo.com/what-is/the-opposite-of/{self._word}.html', proxies=self._proxies).get_single_page_html() elif self._user_agent is not None: response = Query(f'https://www.wordhippo.com/what-is/the-opposite-of/{self._word}.html', user_agent=self._user_agent, proxies=self._proxies).get_single_page_html() if response.status_code == 404: logger.info(f'Wordhippo.com had no antonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") cloudflare_protection = CloudflareVerification('https://www.wordhippo.com', soup).cloudflare_protected_url() if cloudflare_protection is False: pattern = regex.compile(r'We do not currently know of any antonyms for') if soup.find(text=pattern): logger.info(f'Wordhippo.com had no antonym reference for the word {self._word}') else: related_tag = soup.find("div", {'class': 'relatedwords'}) if related_tag.find("div", {'class': 'wb'}) is not None: for list_item in related_tag.find_all("div", {'class': 'wb'}): for link in list_item.find_all('a', href=True): antonyms.append(link.text) antonyms = sorted([x.lower() for x in antonyms]) self._update_cache(antonyms) return antonyms else: for table_row in related_tag.find_all('td'): for href_link in table_row.find('a', href=True): antonyms.append(href_link.text) antonyms = sorted([x.lower() for x in antonyms]) self._update_cache(antonyms) return antonyms elif cloudflare_protection is True: logger.info('-' * 80) logger.info(f'The following URL has Cloudflare DDoS mitigation service protection.') logger.info('https://www.wordhippo.com') logger.info('-' * 80) return None except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))
def _query_thesaurus_com(self): """ This function queries thesaurus.com for antonyms associated with the specific word provided to the Class Antonyms. :returns: antonyms: list of antonyms :rtype: list :raises AttributeError: Raised when an attribute reference or assignment fails IndexError: Raised when a sequence subscript is out of range KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys TypeError: Raised when an operation or function is applied to an object of inappropriate type bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features is found """ try: antonyms = [] response = '' if self._proxies is None: if self._user_agent is None: response = Query(f'https://www.thesaurus.com/browse/{self._word}').get_single_page_html() elif self._user_agent is not None: response = Query(f'https://www.thesaurus.com/browse/{self._word}', user_agent=self._user_agent).get_single_page_html() elif self._proxies is not None: if self._user_agent is None: response = Query(f'https://www.thesaurus.com/browse/{self._word}', proxies=self._proxies).get_single_page_html() elif self._user_agent is not None: response = Query(f'https://www.thesaurus.com/browse/{self._word}', user_agent=self._user_agent, proxies=self._proxies, ).get_single_page_html() if response.status_code == 404: logger.info(f'Thesaurus.com had no antonym reference for the word {self._word}') else: soup = BeautifulSoup(response.text, "lxml") cloudflare_protection = CloudflareVerification('https://www.thesaurus.com', soup).cloudflare_protected_url() if cloudflare_protection is False: if soup.find("div", {'id': 'antonyms'}): parent_tag = soup.find_all("div", {'data-testid': 'word-grid-container'})[1] for link in parent_tag.find_all('a', {'class': 'css-pc0050'}): antonyms.append(link.text.strip()) antonyms = sorted([x.lower() for x in antonyms]) self._update_cache(antonyms) return antonyms else: logger.info(f'Thesaurus.com had no antonym reference for the word {self._word}') elif cloudflare_protection is True: logger.info('-' * 80) logger.info(f'The following URL has Cloudflare DDoS mitigation service protection.') logger.info('https://www.thesaurus.com') logger.info('-' * 80) return None except bs4.FeatureNotFound as error: logger.error('An error occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except AttributeError as error: logger.error('An AttributeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except IndexError as error: logger.error('An IndexError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except KeyError as error: logger.error('A KeyError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__))) except TypeError as error: logger.error('A TypeError occurred in the following code segment:') logger.error(''.join(traceback.format_tb(error.__traceback__)))