def fetch_wikidata(title: str) -> Dict: api_data = {} wikipedia = MediaWiki() try: page_data = wikipedia.page(title) except Exception as e: return {'message': '[wikipedia] error getting wikidata'} pool = ThreadPoolExecutor(max_workers=6) poster = pool.submit(lambda :page_data.logos) content = pool.submit(lambda :page_data.sections) categories = pool.submit(lambda :page_data.categories) images = pool.submit(lambda :page_data.images) summary = pool.submit(lambda :page_data.summary) title_ = pool.submit(lambda :page_data.title) api_data['poster'] = poster.result() api_data['contents'] = content.result() api_data['categories'] = categories.result() api_data['images'] = images.result() api_data['summary'] = summary.result() api_data['title'] = title_.result() return api_data
def __init__(self, url='https://{lang}.wikipedia.org/w/api.php', lang='en', timeout=None, rate_limit=False, rate_limit_wait=timedelta(milliseconds=50)): ''' overloaded init ''' MediaWiki.__init__(self, url=url, lang=lang, timeout=timeout, rate_limit=rate_limit, rate_limit_wait=rate_limit_wait)
def __init__(self, coord, route): """ Function that instanciate a WikimediaApi object """ self.lat = str(coord['lat']) if coord else "" self.lng = str(coord['lng']) if coord else "" self.route = route self.wikipedia = MediaWiki(lang=u'fr')
def keywordExtraction(): # Extract the keyword from user's input while True: keyword_sentence = raw_input() if " is " in keyword_sentence: # for questions like "Who is - ?" keyword_sentences = keyword_sentence.split("is ") elif " are " in keyword_sentence: # for questions like "Who are the - ?" keyword_sentences = keyword_sentence.split("are ") elif " about " in keyword_sentence: # for questions like "What do you know about - ?" keyword_sentences = keyword_sentence.split("about ") elif " in " in keyword_sentence: # for sentences like "I'm interested in - " keyword_sentences = keyword_sentence.split("in ") elif " when " in keyword_sentence: # for sentences like "I want to know when - " keyword_sentences = keyword_sentence.split("when ") elif " where " in keyword_sentence: # for sentences like "I want to know where - " keyword_sentences = keyword_sentence.split("where ") else: # if it is not one of the patterned questions try: # check if the question has a page on wikipedia (for example if the user inputs only the keyword) wikipedia_mediawiki = MediaWiki() wikiPage = wikipedia_mediawiki.page(keyword_sentence, auto_suggest=False) # check without auto-suggest return [keyword_sentence, False] # False = auto-suggest OFF except: try: wikiPage = wikipedia_mediawiki.page(keyword_sentence) # check with auto-suggest return [keyword_sentence, True] # True = auto-suggest ON except: print("I'm sorry the information you want are not available on wikipedia! Try with something else!") log("keywordEX,null") continue keyword_sentences = keyword_sentences[1].split("?") [page, auto_suggest] = checkWiki(keyword_sentences[0]) if page: # if it exists a wikipedia page about the keyword return [keyword_sentences[0], auto_suggest]
class WikiProvider(LookupProvider): '''Concrete provider which provides web results from Wikipedia. ''' def __init__(self): '''Initialize WikiProvider with a MediaWiki instance. ''' self._wiki = MediaWiki( user_agent="word_tools (https://github.com/ncdulo/word_tools") LookupProvider.__init__(self) def lookup(self, word, limit=0): '''Yield str results for `word` up to `limit`. When `limit <= 0`, default to `limit = 3`. ''' # Default to a limit of three results. Once the re-write of CLI # is complete, this should be updated, and likely removed if limit <= 0: limit = 3 try: for result in self._wiki.opensearch(word, results=limit): title, _, url = result summary = self._wiki.page(title).summarize(chars=200) output = title + ' (' + url + ')\n' + summary yield output except exceptions.DisambiguationError as e: print('''Search term disambiguous. There are some issues in the way results are returned. Wikipedia suggests the following page names. These may not be correct. This is a known issue. ''') print(e)
class WikiScratcher: def __init__(self, category): self.wikipedia = MediaWiki(url='https://en.wikipedia.org/w/api.php', user_agent='wiki-data-loader', lang='en') self.category = category # returns {pagename: {sectionname: section}, ....} def get_sections(self, num_pages): res = {} page_titles = self.wikipedia.categorymembers(self.category, results=num_pages, subcategories=False) if (len(page_titles) < num_pages): print('Only ' + str(len(page_titles)) + ' pages found !!!') for p_title in page_titles: res[p_title] = {} p = self.wikipedia.page(p_title) # add the summary res[p_title]['summary'] = p.summary # add all other sections section_titles = p.sections for s_title in section_titles: # ignore sections like 'references' or 'see also' if (self._ignore_section(s_title)): continue section_text = p.section(s_title) # ignore empty sections which are in fact most likely subheaders if len(section_text) > 0: res[p_title][s_title] = section_text return res
def response(self, txObject): super(WikiLayer, self).response(txObject) if self.check_cmd(COMMANDS["WIKI"]["name"], txObject): key_value = parse_cmd_value(txObject[PROCESSED_INPUT]) respose_value = None try: wikipedia = MediaWiki() try: respose_value = wikipedia.page(key_value).summary except DisambiguationError as e: respose_value = str(e) txObject[PROCESSED_INPUT] = respose_value STOPLAYER.send() except ConnectionError as e: txObject[PROCESSED_INPUT] = str(e) return txObject
async def moegirl_search(q): moegirlwiki = MediaWiki(url='http://zh.moegirl.org/api.php') t = moegirlwiki.search(q) if len(t) == 0: return False p = moegirlwiki.page(t[0]) return p.summary
def __init__(self): self.wikipedia = MediaWiki(lang=u'fr') self._latitude = None self._longitude = None self._response = None self._url = None self._summary = None
def wikipedia_summary(topic, lang='en'): wikipedia = MediaWiki(lang=lang) search = wikipedia.search(topic) summary = wikipedia.summary(search[0]) text = '**{}**\n\n{}\n**Read more at:** [{}]({})'.format( page.title, summary, page.title, page.url) return text
class StoryTeller: """docstring""" def __init__(self): self.wikipedia = MediaWiki(lang=u'fr') self._latitude = None self._longitude = None self._response = None self._url = None self._summary = None def set_position(self, latitude, longitude): """docstring""" self._latitude = latitude self._longitude = longitude if self._latitude == None and self._longitude == None: self._response = [] else: self._response = self.wikipedia.geosearch( latitude=self._latitude, longitude=self._longitude) def choice_title(self): """docstring""" return random.choice(self._response) def get_informations(self): """docstring""" if self._response == []: return [self._summary, self._url] else: page = self.wikipedia.page(self.choice_title()) self._summary = page.summary self._url = page.url return [self._summary, self._url]
def get_prediction(): wikipedia = MediaWiki() word = request.args.get('word') # Set stop words language stop_words = get_stop_words('en') stop_words = get_stop_words('english') # split query filtered_sentence = "" filtered_sentence = word.split() reponse = [] for each in filtered_sentence: if each not in stop_words: reponse.append(each) string_query = ' '.join(reponse) serviceurl = 'https://maps.googleapis.com/maps/api/geocode/json?' address = string_query if len(address) < 1: return try: url = serviceurl + "key=" + app.config['KEY_API'] +\ "&" + urllib.parse.urlencode({'address': address}) uh = urllib.request.urlopen(url) data = uh.read().decode() js = json.loads(data) except: print('==== Failure URL ====') js = None if not js: if 'status' not in js: if js['status'] != 'OK': print('==== Failure To Retrieve ====') print(js) else: lat = js["results"][0]["geometry"]["location"]["lat"] lng = js["results"][0]["geometry"]["location"]["lng"] # sent coordinates to Media wiki query = wikipedia.geosearch(str(lat), str(lng)) # Save first answer history = query[0] # sent answer to Media wiki summary = wikipedia.summary(history) # return summary to view html return jsonify({'html': summary})
def apiWikipedia(search, language): print(language, search) if(language == 'pt'): language = 'pt-br' wikipedia = MediaWiki(lang=language) if(len(wikipedia.search(search)) < 1): raise Exception('apiWikipedia: Content not found') page = wikipedia.page(search) return page.summary, page.url
def open_webpage(page_name): """ open the desired content of any webpage from wikipeda """ wiki = MediaWiki() page = wiki.page(page_name) content = page.content return content
def webpage_content(page_name): """ open the desired summary of any webpage from wikipeda """ wiki = MediaWiki() page = wiki.page(page_name) summary = page.summary return summary
def getTopMatchesUsingCorrelation(keyword, links, numMatches): #Calculate correlation #Download each link. For each link, find out how many times, current keyword occurs. #how many times does this keyword in each of its links keywordOccurenenceMap = {} remainingLinkSet = set(links) wikipedia = MediaWiki() #First get all links from db/cache articlesInCache = WikiArticles.objects.filter(title__in=links) for articleInCache in articlesInCache: #How many times is this keyword in link's associated wikipedia page title = articleInCache.title html = articleInCache.text text = covertHtml2Text(html) #Note that we are using link here and title as first argument addToKeywordOccurenceMap(title, text, keyword, keywordOccurenenceMap) #Remove from set, so that at the end we know what keyword we should fetch from wikipedia remainingLinkSet.remove(articleInCache.title) newWikiArticles = [] for link in remainingLinkSet: try: l.warning("analyzing " + link) except Exception as e: l.warning("1 rags") linkPage = None try: linkPage = wikipedia.page(link) except Exception as e: #TODO: Log-error continue if linkPage is None or linkPage == "": raise Exception( "Wikipedia page not found/or is empty for keyword " + link) title = linkPage.title html = linkPage.html text = covertHtml2Text(html) #Note that we are using link here and title as first argument addToKeywordOccurenceMap(link, text, keyword, keywordOccurenenceMap) #bulk update #newWikiArticle = WikiArticles(title=title,text=text) #newWikiArticles.append(newWikiArticle) try: WikiArticles.objects.create(title=title, text=text) except Exception as e: l.warning("Failed to save " + title) l.warning(str(e)) #continue silently #WikiArticles.objects.bulk_create(newWikiArticles,batch_size=DJANGO_BULK_CREATE_BATCH_SIZE) return keywordOccurenenceMap
def make_geosearch(self, lat, lon): wikipedia = MediaWiki() wikipedia_result = wikipedia.geosearch(lat, lon) try: opensearch_result = self.make_opensearch(wikipedia_result[0]) return opensearch_result[0][1], wikipedia_result[0] except IndexError: return "this is a very nice place but i do not have any story about this place." return opensearch_result
def lookup(self, word): wikipedia = MediaWiki() #wikipedia.set_api_url('https://en.wikpedia.org/w/api.php') summary = '' search_results = wikipedia.opensearch(word) if len(search_results) > 0: page_title = search_results[0][0] page = wikipedia.page(page_title) parts = page.summary.split('. ') summary = parts[0] return summary
def import_wiki(article_title): """ This function fetch text from Wikipedia page based on the article title. This function returns the wikipedia article. article_title: The title of Wikipedia article (in string) """ wikipedia = MediaWiki() article = wikipedia.page(article_title) # print(article.title) return article.content
def find_short_meaning(search): try: wikipedia = MediaWiki() meaning = wikipedia.page(search.title()) except DisambiguationError: return find_alter_meaning(search) else: if search.lower() != meaning.title.lower(): return find_alter_meaning(search) def_meaning = meaning.summarize() return str(def_meaning + "link for further read: " + wikipedia.opensearch(f'{meaning.title}', results=1)[0][2])
def checkWiki(keyword): # Check if it exists a wikipedia page corresponding to the keyword try: wikipedia_mediawiki = MediaWiki() # check if exists a page corresponding to the keyword wikiPage = wikipedia_mediawiki.page(keyword, auto_suggest=False) return [True, False] # True = the page exists, False = auto-suggest OFF except: try: wikiPage = wikipedia_mediawiki.page(keyword) # check with auto-suggest return [True, True] # True = the page exists, True = auto-suggest ON except: print("I'm sorry the information you want are not available on wikipedia! Try with something else!") return [False, False] # False = the page doesn't exist
def return_answer(self): """this function returns a dictionnary containing {'result' : 2, 'commentary' : "sentence from bot", 'latitude' : number,'longitude' : number, "adress" : "info", "summary" : "text", "link_wiki" : "url"} 2 = result found, wiki found, 1 = result found no wiki, 0 = not found. If 0 appears, there won't be latt, lng, neither summary""" # if result from parse is null if self.sentence == "Error": self.result['result'] = 0 self.result['commentary'] = random.choice(GENERIC_NO_ANSWER) # if there is a result else: # creating googlemaps client gmaps = googlemaps.Client(key=os.environ.get("BACKEND_KEY", "")) returned_list = gmaps.geocode(self.sentence) # if result is empty, we're returning a message # and a number that will let ajax know if not returned_list: self.result['result'] = 0 self.result['commentary'] = random.choice(GENERIC_NO_ANSWER) # answers = 0 else: #creating local var that will display first googlemaps answer best_result = returned_list[0] compile_dic(best_result, self.result) wikipedia = MediaWiki(lang='fr') t = wikipedia.geosearch(latitude=self.result["latitude"], \ longitude=self.result["longitude"]) # if wiki does not have stories regarding that place if not t: self.result['result'] = 1 self.result['commentary'] = random.choice( GENERIC_LOC_FOUND) # if wiki has full info else: self.result['result'] = 2 self.result['commentary'] = random.choice( GENERIC_LOC_FOUND) p = wikipedia.page(t[0]) self.result["summary"] = p.summary[:250] + "..." self.result["link_wiki"] = p.url return self.result
def get_wikipedia_article(s_word): try: wikipedia = MediaWiki(url=wikiurl) wp_words = wikipedia.search(s_word, results=1) wp_article = wikipedia.page(wp_words[0]) return wp_article except DisambiguationError as e: wp_article = wikipedia.page(random.choice(e.options)) return wp_article except Exception as e: app.logger.info('Exception') app.logger.info(e) return False
class WikiMedia: """Wikipedia class.""" def __init__(self): self.wikipedia = MediaWiki() self.wikipedia.language = "fr" def get_infos(self, query): """Method allowing to retrieve informations from wikipedia.fr.""" try: titles = self.wikipedia.search(query) if len(titles) > 0: infos = self.wikipedia.page(titles[0]) summary = self.wikipedia.summary(titles[0], sentences=3) # Add regex to remove == string == in summary: summary = re.sub(r"={2}\s.+={2}", r"", summary) status = True url = infos.url # Return empty results if no titles are return from the API else: summary = "" url = "" status = False # Use one except block in case of disambiguations errors. # Allow to search for the next title if the first one lead # to a disambiguation error. except mediawiki.exceptions.DisambiguationError: if len(titles) > 1: try: infos = self.wikipedia.page(titles[1]) summary = self.wikipedia.summary(titles[1], sentences=3) summary = re.sub(r"={2}\s.+={2}", r"", summary) url = infos.url status = True except mediawiki.exceptions.DisambiguationError: summary = "" url = "" status = False logging.exception("Exception occurred") else: summary = "" url = "" status = False logging.exception("Exception occurred") return {"summary": summary, "url": url, "status": status}
class WikimediaApi: """ Class that interact with wikimedia api """ def __init__(self, coord, route): """ Function that instanciate a WikimediaApi object """ self.lat = str(coord['lat']) if coord else "" self.lng = str(coord['lng']) if coord else "" self.route = route self.wikipedia = MediaWiki(lang=u'fr') def geosearch(self): """ Function that return a list of pages from wikipedia and coordinate """ try: geores = self.wikipedia.geosearch(self.lat, self.lng, results=5) except: geores = [] return geores def get_pagetitle(self): """ Function that return the title of a page that match the route """ geores = self.geosearch() pagetitle = "" try: regex_route = r"" + self.route i = 0 for i in range(len(geores)): if re.match(regex_route, geores[i]): pagetitle = geores[i] except: pass if not pagetitle: pagetitle = geores[0] if geores else "" return pagetitle def get_about(self): """ Function that return a summary and the url of a wikipedia page """ pagetitle = self.get_pagetitle() page = self.wikipedia.page(pagetitle) if pagetitle else "" about_url = page.url if page else "" try: regex = r'== Situation et accès ==\n.*' section = re.search(regex, page.content).group(0) regex_sub = r'== Situation et accès ==' about_text = (re.sub(regex_sub, "", section)).strip() except: about_text = page.summary if page else "" return {"about_text": about_text, 'about_url': about_url}
def wiki_search(query: str, lang='ru', unquote_percent_encoded=False) -> str: # Default using wikipedia from mediawiki import MediaWiki wikipedia = MediaWiki(lang=lang) result = wikipedia.opensearch(query, results=1) if not result: return '' _, text, url = result[0] if unquote_percent_encoded: from urllib.parse import unquote url = unquote(url) return '{} ({})'.format(text, url)
def __init__(self, logger=DEFAULT_LOGGER, separate: bool = True, n: int = 3, **kwargs): self.profiler = kwargs.get('profiler', DEFAULT_MEASURER) self.logger = logger self.tagger = SequenceTagger.load('ner-fast') self.wikipedia = MediaWiki() self.separate = separate self.n = n self.logger.info("Candidate selector is loaded and ready to use.")
def __init__(self): super().__init__() from os import path from json import loads self.plugin_name = path.basename(__file__).rsplit('.')[0] self.metadata = PluginUtilityService.process_metadata(f'plugins/extensions/{self.plugin_name}') self.plugin_cmds = loads(self.metadata.get(C_PLUGIN_INFO, P_PLUGIN_CMDS)) self.osrs_wiki_url = self.metadata[C_PLUGIN_SET][P_WIKI_URL] self.osrs_user_agent = self.metadata[C_PLUGIN_SET][P_USER_AGENT] rprint( f"{self.metadata[C_PLUGIN_INFO][P_PLUGIN_NAME]} v{self.metadata[C_PLUGIN_INFO][P_PLUGIN_VERS]} Plugin Initialized.") try: self.osrs_wiki = MediaWiki(url=self.osrs_wiki_url, user_agent=self.osrs_user_agent) except Exception: rprint(f"{self.plugin_name} Plugin could not be initialized.")
def __init__(self, title): self.title = title self.mediawiki = get_page(title) self.text = MediaWiki().page(title).content self.sections = [ Section.create(mediawiki, text) for mediawiki, text in zip( split_by_section(self.mediawiki), split_by_section(self.text)) ] self.sections[0].head = self.title
def getAnchorTags(list_of_names): wikipedia = MediaWiki() output = [] for x in list_of_names: per = [] try: curr = wikipedia.page(x) soup = BeautifulSoup(request.urlopen(curr.url).read(),"html.parser") soup = (soup.find('p')) temp = [tag['href'] for tag in soup.select('a[href]')] for g in temp: if 'wiki' in g and not 'ogg' in g: k = g[6:] per.append(k) except DisambiguationError as e: per = [] output += per return output