class WikiProvider(LookupProvider): '''Concrete provider which provides web results from Wikipedia. ''' def __init__(self): '''Initialize WikiProvider with a MediaWiki instance. ''' self._wiki = MediaWiki( user_agent="word_tools (https://github.com/ncdulo/word_tools") LookupProvider.__init__(self) def lookup(self, word, limit=0): '''Yield str results for `word` up to `limit`. When `limit <= 0`, default to `limit = 3`. ''' # Default to a limit of three results. Once the re-write of CLI # is complete, this should be updated, and likely removed if limit <= 0: limit = 3 try: for result in self._wiki.opensearch(word, results=limit): title, _, url = result summary = self._wiki.page(title).summarize(chars=200) output = title + ' (' + url + ')\n' + summary yield output except exceptions.DisambiguationError as e: print('''Search term disambiguous. There are some issues in the way results are returned. Wikipedia suggests the following page names. These may not be correct. This is a known issue. ''') print(e)
def lookup_wiktionary(word): logger = logging.getLogger(__name__) try: wikipedia = MediaWiki() wikipedia.set_api_url('https://en.wiktionary.org/w/api.php') matches = {} search_results = wikipedia.opensearch(word) if len(search_results) > 0: page_title = search_results[0][0] page = wikipedia.page(page_title) parts = page.content.split("\n") i = 0 while i < len(parts): definition = "" part = parts[i].strip() if part.startswith("=== Verb ===") or part.startswith( "=== Noun ===") or part.startswith( "=== Adjective ==="): #print(part) # try to skip the first two lines after the marker if (i + 1) < len(parts): definition = parts[i + 1] if (i + 2) < len(parts) and len(parts[i + 2].strip()) > 0: definition = parts[i + 2] if (i + 3) < len(parts) and len(parts[i + 3].strip()) > 0: definition = parts[i + 3] if part.startswith( "=== Adjective ===") and not 'adjective' in matches: matches['adjective'] = definition if part.startswith("=== Noun ===") and not 'noun' in matches: matches['noun'] = definition if part.startswith("=== Verb ===") and not 'verb' in matches: matches['verb'] = definition i = i + 1 final = "" # prefer verb, noun then adjective if matches.get('adjective', False): final = matches.get('adjective') if matches.get('noun', False): final = matches.get('noun') if matches.get('verb', False): final = matches.get('verb') # strip leading bracket comment if final[0] == '(': close = final.index(")") + 1 final = final[close:] matches['definition'] = final return matches except: e = sys.exc_info() logger.debug(e)
def lookup(self, word): wikipedia = MediaWiki() #wikipedia.set_api_url('https://en.wikpedia.org/w/api.php') summary = '' search_results = wikipedia.opensearch(word) if len(search_results) > 0: page_title = search_results[0][0] page = wikipedia.page(page_title) parts = page.summary.split('. ') summary = parts[0] return summary
def find_short_meaning(search): try: wikipedia = MediaWiki() meaning = wikipedia.page(search.title()) except DisambiguationError: return find_alter_meaning(search) else: if search.lower() != meaning.title.lower(): return find_alter_meaning(search) def_meaning = meaning.summarize() return str(def_meaning + "link for further read: " + wikipedia.opensearch(f'{meaning.title}', results=1)[0][2])
def wiki_search(query: str, lang='ru', unquote_percent_encoded=False) -> str: # Default using wikipedia from mediawiki import MediaWiki wikipedia = MediaWiki(lang=lang) result = wikipedia.opensearch(query, results=1) if not result: return '' _, text, url = result[0] if unquote_percent_encoded: from urllib.parse import unquote url = unquote(url) return '{} ({})'.format(text, url)
def lookup_wikipedia(word): logger = logging.getLogger(__name__) try: wikipedia = MediaWiki() #wikipedia.set_api_url('https://en.wikpedia.org/w/api.php') final = {} search_results = wikipedia.opensearch(word, results=1) logger.debug('WIKI SEARCH RESTULS') logger.debug(search_results) if len(search_results) > 0: logger.debug('WIKI SEARCH RESTULS2') page_title = search_results[0][0] page_link = search_results[0][2] link_parts = page_link.split("/") link_title = link_parts[-1] logger.debug('WIKI SEARCH RESTULS3' + link_title) # lookup summary/answer wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(link_title) if page_py and page_py.exists(): logger.debug('WIKI PAGE SEARCH RESTULS') logger.debug(page_py) # fact fields final['thing'] = page_title final['answer'] = page_py.summary # plus final['url'] = page_py.canonicalurl # page = wikipedia.page(link_title) # logger.debug('WIKI SEARCH RESTULS single') # logger.debug(page_py) # parts = page.summary.split('. ') # summary = parts[0] #summary = page.summary logger.debug('WIKI PAGE SEARCH RESTULS FINAL') logger.debug(final) return final except: e = sys.exc_info() logger.debug(e)
async def send_to_wikipedia(word, site): logger = logging.getLogger(__name__) try: # lookup in wiktionary and send display message wikipedia = MediaWiki() wikipedia.set_api_url('https://en.wiktionary.org/w/api.php') matches = {} search_results = wikipedia.opensearch(word) # logger.debug(search_results) if len(search_results) > 0: page_title = search_results[0][0] page_link = search_results[0][2] # page = wikipedia.page(page_title) # parts = page.content.split("\n") # logger.debug([page_title,page_link]) await publish('hermod/' + site + '/display/show', {'frame': page_link}) except: e = sys.exc_info() logger.debug(e)
class WikiApi: def __init__(self): self.wikipedia = MediaWiki(lang='ru') self.wikiquote = CustomWikiEngine(url="https://{lang}.wikiquote.org/w/api.php", lang='ru') def quotes(self, *words): results = [] for word in words: titles = self.wikiquote.quotes(word, results=2) results += titles return results def quote_page(self, title): response = {} try: response = self.wikiquote.page(title=title) except Exception as e: logging.exception(e) return response def get_pages_by_categories(self, category, limit=10): # https://en.wikipedia.org/w/api.php?a # ction=query& # generator=categorymembers& # gcmlimit=100& # gcmtitle=Category:American%20male%20film%20actors& # prop=pageimages& # pilimit=100 S = requests.Session() URL = "https://ru.wikipedia.org/w/api.php" PARAMS = { 'action': "query", 'generator': "categorymembers", 'gcmtitle': category, 'gcmlimit': limit, 'format': "json" } R = S.get(url=URL, params=PARAMS) DATA = R.json() titles = [] if 'query' in DATA and DATA['query'] and DATA['query']['pages']: titles = [value['title'] for key, value in DATA['query']['pages'].items()] return titles def movies(self): # https://ru.wikipedia.org/w/api.php?format=xml&action=query&list=embeddedin&einamespace=0&eilimit=500&eititle=Template:Infobox_film pass def search(self, *words): results = [] for word in words: response = self.wikipedia.search(word, results=4) short_descriptions = response results += short_descriptions return results def opensearch(self, *words): results = [] for word in words: response = self.wikipedia.opensearch(word) results += response return results def parse(self, *pages): results = [] for page in pages: try: response = self.wikipedia.page(title=page) content = response.content sections = re.split(r'==.+?==', content) if sections: summary = sections[0] results.append(summary) section_headers = re.findall(r'== \w+ ==', content) if '== Сюжет ==' in section_headers: index = section_headers.index('== Сюжет ==') + 1 if len(sections) > index: plot = sections[index] results.append(plot) except Exception as e: logging.error(e) return results
class Plugin(PluginBase): def __init__(self): super().__init__() from os import path from json import loads self.plugin_name = path.basename(__file__).rsplit('.')[0] self.metadata = PluginUtilityService.process_metadata(f'plugins/extensions/{self.plugin_name}') self.plugin_cmds = loads(self.metadata.get(C_PLUGIN_INFO, P_PLUGIN_CMDS)) self.osrs_wiki_url = self.metadata[C_PLUGIN_SET][P_WIKI_URL] self.osrs_user_agent = self.metadata[C_PLUGIN_SET][P_USER_AGENT] rprint( f"{self.metadata[C_PLUGIN_INFO][P_PLUGIN_NAME]} v{self.metadata[C_PLUGIN_INFO][P_PLUGIN_VERS]} Plugin Initialized.") try: self.osrs_wiki = MediaWiki(url=self.osrs_wiki_url, user_agent=self.osrs_user_agent) except Exception: rprint(f"{self.plugin_name} Plugin could not be initialized.") def quit(self): dprint(f"Exiting {self.plugin_name} plugin...", origin=L_SHUTDOWN) log(INFO, f"Exiting {self.plugin_name} plugin...", origin=L_SHUTDOWN) def get_metadata(self): return self.metadata def process(self, text): message = text.message.strip() message_parse = message[1:].split(' ', 1) command = message_parse[0] if command == "price": if not privileges.plugin_privilege_checker(text, command, self.plugin_name): return if self.osrs_wiki is None: self.osrs_wiki = MediaWiki(url=self.metadata[C_PLUGIN_SETTINGS][P_WIKI_URL], user_agent=self.metadata[C_PLUGIN_SETTINGS][P_USER_AGENT]) parameter = message_parse[1] search_criteria = self.manage_search_criteria(parameter) all_item_data = self.pull_json(search_criteria) if all_item_data is not None: item_data_formatted = "<br><font color='{}'>Item:</font> {}<br>Avg. Price: {:,} coins.".format(global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL], all_item_data['name'].title(), all_item_data['overall_average']) item_data_formatted += "<br><font color='{}'>Buy Avg. Price:</font> {:,} coins.".format(global_settings.cfg[C_PGUI_SETTINGS][P_TXT_IND_COL], all_item_data['buy_average']) item_data_formatted += "<br><font color='{}'>Sell Avg. Price:</font> {:,} coins.".format(global_settings.cfg[C_PGUI_SETTINGS][P_TXT_IND_COL], all_item_data['sell_average']) global_settings.gui_service.quick_gui(item_data_formatted, text_type='header', box_align='left') else: global_settings.gui_service.quick_gui(f"Could not find '{search_criteria}' on the grand exchange.", text_type='header', box_align='left') elif command == "osrs": if not privileges.plugin_privilege_checker(text, command, self.plugin_name): return if self.osrs_wiki is None: self.osrs_wiki = MediaWiki(url=self.metadata[C_PLUGIN_SETTINGS][P_WIKI_URL], user_agent=self.metadata[C_PLUGIN_SETTINGS][P_USER_AGENT]) parameter = message_parse[1] global_settings.gui_service.quick_gui(f"Searching the OSRS Wiki for: {parameter}", text_type='header', box_align='left') search_results = self.osrs_wiki.opensearch(parameter) formatted_results = self.get_choices(search_results) if formatted_results is None: global_settings.gui_service.quick_gui("OSRS Wiki Results:<br>No search results found.", text_type='header', box_align='left') return global_settings.gui_service.quick_gui(f"OSRS Wiki Results:<br>{formatted_results}\n", text_type='header', box_align='left') elif command == "quest": if not privileges.plugin_privilege_checker(text, command, self.plugin_name): return if self.osrs_wiki is None: self.osrs_wiki = MediaWiki(url=self.metadata[C_PLUGIN_SETTINGS][P_WIKI_URL], user_agent=self.metadata[C_PLUGIN_SETTINGS][P_USER_AGENT]) parameter = message_parse[1] global_settings.gui_service.quick_gui(f"Searching the OSRS Wiki for: {parameter}", text_type='header', box_align='left') try: page = self.osrs_wiki.page(parameter) except exceptions.PageError: global_settings.gui_service.quick_gui("OSRS Wiki Results:<br>No search results found.", text_type='header', box_align='left') return if "Quests" not in page.categories and page is not None: global_settings.gui_service.quick_gui("OSRS Wiki Results:<br>No search results found.", text_type='header', box_align='left') return soup = BeautifulSoup(page.html, 'html.parser') tds = soup.find_all('td', class_="questdetails-info") final_text = f"<br><u><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_IND_COL]}'>{page.title} Quest Summary</font></u><br><a href='{page.url}'>{page.url}</a>" for i, item in enumerate(tds): f_text = "" if i == 0: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Start Point:</font><br>" elif i == 1: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Difficulty:</font><br>" elif i == 2: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Description:</font><br>" elif i == 3: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Length:</font><br>" elif i == 4: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Requirements:</font><br>" elif i == 5: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Items Required:</font><br>" elif i == 6: f_text = "<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Enemies To Defeat:</font><br>" counter = 0 if i == 4 or i == 6: uls = item.find_all('ul') if uls is not None: for ul in uls: lis = ul.find_all('li') for li in lis: f_text += f"<font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_IND_COL]}'>-- </font>{li.text}<br>" else: f_text += "UNAVAILABLE" elif i == 5: uls = item.find_all('ul') if uls is not None: for ul in item.find_all('ul'): lis = ul.find_all('li') for li in lis: f_text += f"<font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_IND_COL]}'>-- </font>{li.text}<br>" if counter == 0: f_text += f"<br><font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_HEAD_COL]}'>Recommended Items:</font><br>" counter += 1 else: f_text += "UNAVAILABLE" else: f_text += tds[i].text final_text += f_text global_settings.gui_service.quick_gui(final_text, text_type='header', box_align='left') def get_choices(self, search_results): list_urls = "<br>" if search_results: for i, item in enumerate(search_results): completed_url = item[2] list_urls += f"<font color='{global_settings.cfg[C_PGUI_SETTINGS][P_TXT_IND_COL]}'>[{i}]</font>: <a href='{completed_url}'>[{completed_url}]</a><br>" else: return None return list_urls def manage_search_criteria(self, search_criteria): try: return int(search_criteria) except ValueError: return search_criteria.lower() def pull_json(self, search_criteria): return_item = None with urllib.request.urlopen(self.metadata[C_PLUGIN_SETTINGS][P_MAIN_URL]) as url: json_data = json.loads(url.read().decode('utf-8').lower()) for section in json_data: json_item = json_data[section] if json_item.get('name') == search_criteria or json_item.get('id') == search_criteria: return_item = json_item return return_item
def make_opensearch(self, data): wikipedia = MediaWiki() wikipedia_opensearch_result = wikipedia.opensearch(str(data)) return wikipedia_opensearch_result
from mediawiki import MediaWiki wikipedia = MediaWiki() res = wikipedia.opensearch("ru:Малоохтинский парк") print(res)