def headings(page): mediawikiapi = MediaWikiAPI() page = mediawikiapi.page(page) soup = BeautifulSoup(page.html(), 'html.parser') data = [] for headlines in soup.find_all("h3"): data.append(headlines.text.strip()[:headlines.text.strip().find(' (')]) return data
def searchIntent(self, session: DialogSession): if 'UserRandomAnswer' in session.intentName: search = session.payload['input'] else: search = self._extractSearchWord(session) if not search: self._whatToSearch(session, 'whatToSearch') return mediawikiapi = MediaWikiAPI() # set language of the results mediawikiapi.config.language = self.LanguageManager.activeLanguage # Debug code for dev if self._devDebug: self.logInfo(f'User request = {search}') # Store the top 5 titles of the requested search (5 reduces chances of index error) self._top5Results = mediawikiapi.search(search, results=5) # set a index value for iterating through a list in the dialogs index = 0 if not self._top5Results: self.logWarning('No match') self._whatToSearch(session, 'noMatch') return # remove known ambiguous results self.removeKnowenAmbiguousResults() # Check for exceptions and return any good result self._resultSummary = self.sortThroughResults(wikiInstance=mediawikiapi, index=index) # If there are bo good answers then log a warning and inform user if not self._resultSummary: self.logWarning('No match') self._whatToSearch(session, 'noMatch') # If search result found say the result and end else: if self._devDebug: self.logInfo(f'result Summary is {self._resultSummary}') if not self._alternatveResultUsed: self.sayResult(session=session, index=index) else: self.sayAlternatives(alternatives=self._top5Results[index + 1]) self.sayResult(session=session, index=index)
async def main(): print("Running main") if config.import_mode: finish_all_in_list() print("Looking for new DOIs from the Event stream") mediawikiapi = MediaWikiAPI() count = 0 count_dois_found = 0 count_missing_dois = 0 async for event in aiosseclient( 'https://stream.wikimedia.org/v2/stream/recentchange', ): # print(event) data = json.loads(str(event)) # print(data) meta = data["meta"] # what is the difference? server_name = data['server_name'] namespace = int(data['namespace']) language_code = server_name.replace(".wikipedia.org", "") # for exclude in excluded_wikis: # if language_code == exclude: if language_code != "en": continue if server_name.find("wikipedia") != -1 and namespace == 0: title = data['title'] if data['bot'] is True: bot = "(bot)" else: bot = "(!bot)" if data['type'] == "new": type = "(new)" elif data['type'] == "edit": type = "(edit)" else: type = None if type is not None: print(f"{type}\t{server_name}\t{bot}\t\"{title}\"") print(f"http://{server_name}/wiki/{quote(title)}") dois_count_tuple = process_event( mediawikiapi, language_code=language_code, title=title, ) if dois_count_tuple[0] > 0: count_dois_found += dois_count_tuple[0] if dois_count_tuple[1] > 0: count_missing_dois += dois_count_tuple[1] count += 1 print( f"Processed {count} events and found {count_dois_found}" + f" DOIs where {count_missing_dois} were missing in WD.") if config.max_events > 0 and count == config.max_events: exit(0)
async def that_(self, ctx, *args): mediawikiapi = MediaWikiAPI() # get random articles and number rand_articles, rand_num = mediawikiapi.random(2), random.randint(0, 16777215) article_md = ['[{}]({})'.format(article, 'https://en.wikipedia.org/wiki/'+article.replace(' ', '_')) for article in rand_articles] # create embed if not args or args[0] not in ['verbose', 'verbosify']: # zero or wrong arguments embed = discord.Embed(color=discord.Color(rand_num), description='That is so {1}, can we hit {0} {2}'.format(rand_num, *article_md)) else: # either verbose or verbosify embed = discord.Embed(color=discord.Color(rand_num), description='**That is so {1}, can we hit {0} {2}**'.format(rand_num, *article_md)) article_descriptions = [mediawikiapi.summary(article, chars=150, auto_suggest=False) for article in rand_articles] if args[0] == 'verbose': [embed.add_field(name="** **", value=desc, inline=True) for desc in article_descriptions] elif args[0] == 'verbosify': [embed.add_field(name="** **", value=verbosify.verbosify(desc), inline=True) for desc in article_descriptions] await ctx.send(embed=embed)
def wikipedia_search(self, question): """ :param self: self object where chatbot object is also located :param question: :return: A tuple of values: (response, confidence, stat) stat = Found or not found """ mw = MediaWikiAPI() wiki = wikipediaapi.Wikipedia("en") a = mw.search(str(question)) question = str(question) if len(a) >= 1: cos = self.chatbot.lp.similarity(question.lower(), a[0].lower()) else: return "Oops, the item you wanted to know is not on wikipedia.", 0.9, False if cos > 0.9: self.chatbot.globals["reversei"]["enabled"] = False self.chatbot.globals["reversei"]["uid"] = False response = wiki.page(a[0]).summary confidence = cos stat = True else: self.chatbot.globals["reversei"]["enabled"] = True self.chatbot.globals["reversei"]["uid"] = 30000000002 self.chatbot.globals["reversei"]["type"] = int self.chatbot.globals["temp_data"] = a def bracketize(x): return "\n[{}] {}".format(x[0] + 1, str(x[1])) response = "Did you mean any of these {}".format(" ".join( [bracketize(x) for x in enumerate(a)])) confidence = 1.0 stat = False return response, confidence, stat
def wikitable(page): """ Exports a Wikipedia table parsed by BeautifulSoup. Deals with spanning: multirow and multicolumn should format as expected. """ mediawikiapi = MediaWikiAPI() page = mediawikiapi.page(page) soup = BeautifulSoup(page.html(), 'html.parser') rows = table.findAll("tr") ncols = max([len(r.findAll(['th', 'td'])) for r in rows]) # preallocate table structure # (this is required because we need to move forward in the table # structure once we've found a row span) data = [] for i in range(nrows): rowD = [] for j in range(ncols): rowD.append('') data.append(rowD) # fill the table with data: # move across cells and use span to fill extra cells for i, row in enumerate(rows): cells = row.findAll(["td", "th"]) for j, cell in enumerate(cells): cspan = int(cell.get('colspan', 1)) rspan = int(cell.get('rowspan', 1)) l = 0 for k in range(rspan): # Shifts to the first empty cell of this row # Avoid replacing previously insterted content while data[i + k][j + l]: l += 1 for m in range(cspan): data[i + k][j + l + m] += cell.text.strip("\n") return data
async def this(self, ctx, *args): summoned_channel = ctx.message.author.voice_channel if summoned_channel is None: mediawikiapi = MediaWikiAPI() await self.bot.say( f'This is so {mediawikiapi.random()}, can we hit {random.randint(0,10000000)} {mediawikiapi.random()}' ) else: r = requests.get('https://www.billboard.com/charts/hot-100') soup = BeautifulSoup(r.text, 'html.parser') div = soup.find('div', {'class': 'chart-list chart-details__left-rail'}) songs_list = json.loads(div.attrs['data-video-playlist']) songs = [x["title"] for x in songs_list] song = random.choice(songs) song_split = song.split(' - ') await self.bot.say( f'This is so sad, Alexa play {song_split[0]} by {song_split[-1]}' ) await ctx.invoke(self.stop) await self.playurl(ctx, song=song)
from mediawikiapi import MediaWikiAPI from tests.request_mock_data import mock_data # mock out _wiki_request class _wiki_request(object): calls = defaultdict(int) @classmethod def __call__(cls, params, config): cls.calls[params.__str__()] += 1 return mock_data["_wiki_request calls"][tuple(sorted(params.items()))] api = MediaWikiAPI() api.session.request = _wiki_request() class TestSearch(unittest.TestCase): """Test the functionality of mediawikiapi.search.""" def test_search(self): """Test parsing a mediawikiapi request result.""" self.assertEqual(api.search("Barack Obama"), mock_data['data']["barack.search"]) def test_limit(self): """Test limiting a request results.""" self.assertEqual(api.search("Porsche", results=3), mock_data['data']["porsche.search"]) def test_suggestion(self):
to_replace2 = ('who\'s', "what\'s", "whats", "what", "who", " is ", " are ", "who's", "what's", " an ", " a ") # Removing everything except for the word to search for i = remove_all(i, to_replace2) send(i, comment) def read_comment(comment): # Checking if the comment is valid if hasattr(comment, 'body') and hasattr( comment.author, 'name') and comment.author.name != "wikipedia_answer_bot": check_and_send(comment) # Reconnecting to the server if there's 503 error(Service Unavailable) @retry(wait=wait_chain(*[wait_fixed(3) for i in range(3)] + [wait_fixed(7) for j in range(2)] + [wait_fixed(9)])) def main(): # Scanning all the new comments and checking them with the functions above for comment in subreddit.stream.comments(skip_existing=True): read_comment(comment) if __name__ == '__main__': mediawikiapi = MediaWikiAPI() # Creating a wikipedia API variable main()
for j,cell in enumerate(cells): cspan=int(cell.get('colspan',1)) rspan=int(cell.get('rowspan',1)) l = 0 for k in range(rspan): # Shifts to the first empty cell of this row # Avoid replacing previously insterted content while data[i+k][j+l]: l+=1 for m in range(cspan): data[i+k][j+l+m]+=cell.text.strip("\n") return data mediawikiapi = MediaWikiAPI() test_page = mediawikiapi.page('List of video games notable for negative reception') # to check page URL: print(test_page.url) soup = BeautifulSoup(test_page.html(), 'html.parser') # tables = soup.findAll("table", { "class" : "wikitable" }) # headings = soup.findAll('h3') # df_test = wikitable_to_dataframe(tables[0]) # print(df_test) for headlines in soup.find_all("h3"): print(headlines.text.strip()[:headlines.text.strip().find(' (')]) def headings(page): mediawikiapi = MediaWikiAPI()
mode = "file" # print(names) toStore = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []] def remove_accents(input_str): nfkd_form = unicodedata.normalize('NFKD', input_str) return u"".join([c for c in nfkd_form if not unicodedata.combining(c)]) if mode == "wiki": mediawikiapi = MediaWikiAPI() countries = mediawikiapi.page("List_of_national_capitals") table = pd.read_html(countries.url, attrs={"class": "wikitable"})[0] names = table["City/Town"] else: with open("in.txt", "r", encoding="utf8") as f: names = f.readlines() for name in names: name = re.sub('\d*', '', name).lower().strip() second = remove_accents(name)
async def this_(self, ctx, *args): # vc = ctx.voice_client # if not vc or not vc.is_connected(): mediawikiapi = MediaWikiAPI() await ctx.send(f'This is so {mediawikiapi.random()}, can we hit {random.randint(0,10000000)} {mediawikiapi.random()}')