async def kw(self, ctx, articles): try: page = wikia.page(self.wiki, articles) if page.summary.lower().startswith("redirect"): redirect = " ".join(page.summary.split(" ")[1:]) page = wikia.page(self.wiki, redirect) summary = page.content.split("\n")[0] await ctx.send( f'**{page.title}:**\n\n{summary}\n\n*{"_".join(page.url.split(" "))}*' ) except: await ctx.send(f'No wiki page found for "{articles}".')
def lovecraft(this_id=None): wiki = requests.get( "https://lovecraft.fandom.com/wiki/Category:Great_Old_Ones") soup = BeautifulSoup(wiki.text, 'html.parser') links = soup.findAll('a', {'class': 'category-page__member-link'}) categories = [link.get('title') for link in links] if this_id: this_old_one = this_id else: this_old_one = random.choice(categories) chosen = wikia.page("Lovecraft", this_old_one) try: image = chosen.images[0] except: image = "/static/images/wikia-lc.jpg" snippet = (chosen.summary[:100] + '..') if len(chosen.summary) > 100 else chosen.summary old_one = { 'id': chosen.title, 'title': chosen.title, 'url': chosen.url, 'image': image, 'snippet': snippet } # Returns a wiki page, with various specific options to use later. # chosen.title will be the title of the Old One page. # chosen.url will be the link to the wiki page # chosen.images[0] will yield a string of the url that points to an image. use this to make a daily thumbnail # snippet = (chosen.summary[:100] + '..') if len(chosen.summary) > 100 else chosen.summary return old_one
async def search(message): query = message.ai.get_parameter("search_query") wiki = message.config["wiki"] if wiki is None or wiki.lower() == "wikipedia": try: page = wikipedia.page(query) summary = wikipedia.summary(query, sentences=3) embed = Embed(title=page.title, url=page.url, description=summary, timestamp=datetime.utcnow()) try: embed.set_thumbnail(url=page.images[0]) except (IndexError, AttributeError): pass suggestion = wikipedia.random() embed.set_footer(text="Wikipedia | Try asking \"What is {}?\"".format(suggestion)) await message.reply(embed=embed) except (ValueError, wikipedia.WikipediaException): await message.reply("Sorry, I have no information for your search query `{}`.".format(query)) return elif query is None: await message.reply("Sorry, I couldn't find a search query.", expire_time=5) return else: try: results = wikia.search(wiki, query) page = wikia.page(wiki, results[0]) url = page.url.replace(" ", "_") embed = Embed(title=page.title, url=url, description=page.summary, timestamp=datetime.utcnow()) try: embed.set_thumbnail(url=page.images[0]) except (IndexError, AttributeError): pass embed.set_footer(text="{} wikia".format(wiki)) await message.reply(embed=embed) except (ValueError, wikia.wikia.WikiaError): await message.reply("Sorry, I have no information for your search query `{}`.".format(query))
async def wikia(self, ctx, *, term=''): '''Searches for and grabs a link to the page of the given title from the set wiki sites.''' if not self.wikia_list.get(str(ctx.guild.id)): await ctx.send(f'No wikias have been set for this guild. Use the `{self.bot.command_prefix}wikialist` command to add some to the list') return if term: async with ctx.channel.typing(): w = 0 log.info(f'retrieve "{term}" from wikia...') for sub in self.wikia_list.get(str(ctx.guild.id)): try: w = wikia_module.page(sub.strip(' []'), term) log.info('page found on {0}...'.format(sub)) break # page found, exit the for loop except: w = 0 log.info(f'page not found in {sub}') if w is not 0: await ctx.send(w.url.replace(' ', '_')) else: await ctx.send(f':sweat: Sorry, I couldnt find a page titled "{term}"...') else: log.info('no search term...') await ctx.send('Use **!wiki <Page Title>** to search and grab a link to the page of that title on the following wiki sites: {}'.format(self.wikia_list[ctx.guild.id]))
async def wiki(self, ctx, *args): if not args: await ctx.channel.send("Please provide a wiki page to search!") return message = ' '.join(args) try: page = wikia.page("monumentammo", message) except wikia.WikiaException: await ctx.channel.send("Wiki page not found!") else: output = "" content = page.content # TODO: Better formatting for wiki stuff # content.replace('\n', '\n\n') while len(content) > 1500: output = content[:1500] content = content[1500:] await ctx.channel.send("```" + output + "```") output = content await ctx.channel.send("```" + output + "```") await ctx.channel.send("Full page at: " + page.url.replace(' ', '_'))
def wikia_links(wiki, header): '''Generator of titles of linked articles to given one on wiki''' page = wikia.page(wiki, header) soup = BeautifulSoup(page.html(), "html.parser") for paragraph in soup.find_all('p'): for link in paragraph.find_all('a'): try: yield link['title'] except KeyError: continue
def getAnyWikiaUrl(site, title): try: search = wikia.search(site, title) title = search[0] page = wikia.page(site, title) url = page.url url = url.replace(" ", "_") return urllib2.quote(url, safe="http://") except: print "api anywikiurl error"
def get_page(self): '''Returns page on wikipedia or wikia''' if self.wiki == 'wikipedia': try: page = wikipedia.page(self.header) self.links = page.links return page except wikipedia.exceptions.PageError: raise PageNotFound(self.wiki, self.header) else: try: return wikia.page(self.wiki, self.header) except wikia.wikia.WikiaError: raise PageNotFound(self.wiki, self.header)
def findCharacter(name): try: page = wikia.page("marvel", name) except: print("The character you are looking for is does not exist in the Earth-616 universe ", name) return 0 currentUrl = page.url; #print(currentUrl); currentUrl = currentUrl.replace(" ", "_") tempSoup = BeautifulSoup(urllib.request.urlopen(currentUrl),"html.parser") wiki = "" if(len(tempSoup('div', {'id' : 'messageBox'})) > 0): for soupLine in tempSoup('div',{'id': 'mw-content-text'}): character = soupLine.find_all("a") newUrl = "http://marvel.wikia.com" + character[1]["href"] testUrl = character[1]["href"].replace('/wiki/','') break elif(len(tempSoup('div', {'class' : 'conjoined-infoboxes'})) == 0 and len(tempSoup('div', {'class' : 'infobox'})) == 0): for soupLine in tempSoup('div',{'id': 'mw-content-text'}): character = soupLine.find_all("a") for index in range(0,len(character)): try: print("Title: ",character[index]["title"]) #if('Earth-616' in character[index]["title"]): wiki = character[index]["href"] print(wiki) break except: print("Key Error") pass newUrl = "http://marvel.wikia.com" + wiki testUrl = wiki.replace('/wiki/','') else: newUrl = currentUrl testUrl = newUrl.replace('http://marvel.wikia.com/wiki/','') if(len(tempSoup('div', {'class' : 'conjoined-infoboxes'})) == 0 and len(tempSoup('div', {'class' : 'infobox'})) == 0): #print("recurse") if 'Earth-616' not in testUrl: if 'Earth' in testUrl: print("The character you are looking for is from a different dimension", name) return 0 return findCharacter(testUrl) else: return newUrl else: #print("redirect") return newUrl;
def main(self): """ Search for an article and return a short excerpt. """ topics = wikia.search("Runescape", self.params.title) if isinstance(topics, list) and len(topics) > 0: article = wikia.page("Runescape", topics[0]) print("- " + article.title + "\n") if self.params.more: print(article.content + "\n") else: print(article.summary + "\n") print(article.url)
async def hpwikia(self, *searchitems): searchitem = " ".join(searchitems) found = wikia.search("harrypotter", searchitem)[0] summary = wikia.summary("harrypotter", found) page = wikia.page("harrypotter", found) url = page.url clear_url = url.replace(' ', '_') image = page.images if image == []: image = "https://upload.wikimedia.org/wikipedia/commons/e/e5/Coat_of_arms_placeholder_with_question_mark_and_no_border.png" else: image = image[-1] title = page.title embed = discord.Embed(title=title, url=clear_url, description=summary) embed.set_thumbnail(url=image) await self.client.say(embed=embed)
def download_page(config: Config, io_manager: IOManager, page_name: str) -> Page: page = wikia.page(config.wiki_name, page_name) page_name_resolved = page.title.replace(" ", "_") html = page.html() soup = bs4.BeautifulSoup(html, "html.parser") outgoing_links = parse_outgoing_links(soup) html_path = io_manager.file_writer.write_html(page_name_resolved, html) return Page( name=page_name_resolved, html_path=html_path, outgoing_links=outgoing_links, )
async def search_wiki(self, ctx: commands.Context, val: typing.Optional[int] = 1, *, arg): if val <= 5: try: query = str(arg) search = wikia.search("thedivision", query) for i in range(val): page = wikia.page("thedivision", search[i]) url = page.url.replace(' ', '_') await ctx.send(f"<{url}>") except ValueError: await ctx.send("No results found") else: await ctx.send("Max of 5 results allowed.")
def getWikiaUrl(site, title): try: if site == "lolwiki": site = "leagueoflegends" elif site == "rswiki": site = "2007.runescape" elif site == "hswiki": site = "hearthstone" elif site == "rsfi": site = "fi.runescape" search = wikia.search(site, title) title = search[0] page = wikia.page(site, title) url = page.url url = url.replace(" ", "_") return urllib2.quote(url, safe="http://") except: print "api wikiaurl error"
def get_wiki_page( search_term ): # Get the Wikia page for a particular search term. In this case, it gets the best 1. try: list_of_results = wikia.search(sub_wikia='choices-stories-you-play', query=search_term, results=5) except ValueError: # This search for this term failed. return None best_result = list_of_results[0] if " " not in best_result: # This is a single title article, which causes problems. best_result = get_wiki_page_google(search_term) # Return it as an WikiaPage object. best_result_object = wikia.page(sub_wikia='choices-stories-you-play', title=best_result) return best_result_object
def query_text(inline_query): total = 10 try: search_results = wikia.search(WIKI, inline_query.query, total) results = [] for i, page_result in enumerate(search_results): try: page = wikia.page(WIKI, page_result) except: break title, url = page.title, page.url url = url.replace(' ', '%20') results.append(telebot.types.InlineQueryResultArticle(str(i), title, url)) BOT.answer_inline_query(inline_query.id, results) except Exception as ex: print(ex)
def page(self, pageName: str): '''returns an instance of a wikia page''' return wikia.page(self.wiki, pageName)
def get_wikia_url(wikia_title, query): try: wurl = wikia.page(wikia_title, query) return wurl.url.replace(' ', '_') except wikia.WikiaError as e: print(e)
def wiki_a(wiki, context, query): # Initialize message = "" if( context.lower() == "search" ): message = ", \n".join(wikia.search(wiki, query)) elif( context.lower() == "summary" ): try: message = wikia.summary(wiki, query) except: message = ERROR elif( context.lower() == "toc" ): try: sec_list = wikia.page(wiki, query).sections # Throws together a list of numbered sections for section use below message = sec_list[0] + " (1)" x = 1 for i, x in enumerate(sec_list[1:]): message += ", \n" + x + " (" + str(i+2) + ")" except: message = ERROR elif( context.lower() == "section" ): try: # Uses the secton number to return query sec_list = wikipedia.page(query).sections store = query.split(" ",1) sec_num = int(store[0]) query = store[1] message = wikipedia.page(query).section(sec_list[sec_num]) if( message == NONE ): # This will happen often because the API is pretty shit message = "Unable to grab the section text." except: message = ERROR elif( context.lower() == "full" ): try: message = wikia.page(wiki, query).content except: message = ERROR elif( context.lower() == "image" ): try: x = "" img_list = wikipedia.page(query).images # Removing first letter to remove capital because wikipedia # is anal about uppercase and lowercase for i, x in enumerate(img_list): if query[1:] in x: break if x == "": message = "Image could not be found." else: resp.message().media(x) except: ERROR elif( context.lower() == "url" ): try: message = wikia.page(wiki, query).url except: message = ERROR else: message = "Invalid context. Type '?' for help." return message
async def displaypage(self, message_object, args): elements = args.split("/") if len(elements) > 1: try: print ("[a]") status = await self.pm.client.send_message(message_object.channel, ':information_source:`Looking up wikia page~`'.format()) print ("[b]") await self.pm.client.send_typing(message_object.channel) print ("[c]") page = wikia.page(elements[0], elements[1]) url = page.url print ("[d]") if len(elements) == 2: print ("[e]") header = '{0} > {1}'.format(elements[0], elements[1]) content = page.summary print ("[e.5]") else: print ("[f]") header = '{0} > {1} > {2}'.format(elements[0], elements[1], elements[2]) content = page.section(elements[2]) print ("[f.5]") except: try: print ("[search]") search = wikia.search(elements[0], elements[1]) print ("[search.1]") results = "" i = 1 print ("[search.2]") for x in search: results = results + "{0}: {1}\n".format(i, x) i += 1 print ("[search.3]") await self.pm.client.edit_message(status, ":information_source:**No page found, here's the search results instead**\n```{0}```\n*Select the page you want to view by responding with a number*".format(results)) print ("[search.4]") response = await self.pm.client.wait_for_message(author=message_object.author) print ("[search.7]") try: page = wikia.page(elements[0], search[int(response.content) - 1]) print ("[search.8]") header = '{0} > {1}'.format(elements[0], search[int(response.content) - 1]) print ("[search.9]") content = page.summary print ("[search.10]") url = page.url print ("[search.11]") except: await self.pm.client.edit_message(status, ":exclamation:`Invalid Selection!`".format()) return except: await self.pm.client.edit_message(status, ":exclamation:`Invalid Wikia or no results found!`".format()) return print ("[display.1]") tags = "" for x in page.sections: tags = tags + x + ', ' print ("[display.1.5]") if len(content) > 1000: content = content[:1000]+"..." print ("[display.2]") em = discord.Embed(title='', description="**Summary**\n{0}\n\n**Sub Sections**\n{1}\n\n**Link**\n{2}".format(content, tags, url), colour=0x007AFF, url=url) em.set_author(name=header) em.set_footer(text="Noku-wikia version 1.0.5", icon_url=self.pm.client.user.avatar_url) print ("[display.3]") if len(page.images) > 0: em.set_thumbnail(url=page.images[0]) #print(content) print ("[display.4]") try: await self.pm.client.send_message(message_object.channel, embed=em) except: await self.pm.client.send_message(message_object.channel, "***{3}***\n\n**Summary**\n{0}\n\n**Sub Sections**\n{1}\n\n**Link**\n{2}".format(content, tags, url, header)) try: await self.pm.client.delete_message(status) await self.pm.client.delete_message(response) except: pass
else: commentCommandError = False # Term could not be found in database, respond appropriately if defineKeyword == True and techKeyword == False: wikiSearchSuccesful = True try: wikiSearchResult = wikia.search("smashbros", missingTerm, results=10)[0] wikiSearchSummary = wikia.summary("smashbros", wikiSearchResult, chars=500, redirect=True) wikiSearchURL = wikia.page("smashbros", title=wikiSearchResult, pageid=None, redirect=True, preload=False).url #Make links that end with ) work if wikiSearchURL[len(wikiSearchURL) - 1] == ')': print("Fixing wiki link") wikiSearchURL = wikiSearchURL[:len( wikiSearchURL ) - 1] + '\\' + wikiSearchURL[len(wikiSearchURL) - 1:] #print wikiSearchURL except: print("!! Exception thrown when searching the Wiki!!") wikiSearchSuccesful = False print(wikiSearchSuccesful) try: if wikiSearchSuccesful == True:
def process_summary_call(post): #special("__________________________________________________") #special("SUMMARY CALL: %s"%post.id) wikia = find_link(body) replacedbody = post.body.lower().replace('wikiabot','___uawb___wikiabot') if re.search(r'wikiabot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',replacedbody): post_body = re.sub(r'wikiabot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',r'\2',replacedbody).split('___uawb___')[1].split('.')[0].split('?')[0] term = post_body.strip() elif re.search(r'wikiabot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',replacedbody): post_body = re.sub(r'wikiabot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',r'\3',replacedbody).split('___uawb___')[1].split('.')[0].split('?')[0] term = post_body.strip() elif re.search("\?\-.*\-\?",replacedbody): term = re.search("\?\-.*\-\?",post.body.lower()).group(0).strip('?').strip('-').strip() special("SUMMARY CALL: %s @ %s"%(filter(lambda x: x in string.printable, term),post.id)) if term.lower().strip() == 'love': #post_reply('*Baby don\'t hurt me! Now seriously, stop asking me about love so many times! O.o What were we discussing about in this thread again?*',post) return(False,False) #if term.lower().strip() == 'wikiabot': #post_reply('*Me! I know me.*',post) return(False,False) if term.lower().strip() == 'reddit': #post_reply('*This place. It feels like home.*',post) return(False,False) if term.strip().__len__() < 2 or term == None: log("EMPTY TERM") return(False,False) try: title = wikia.page(sub_wikia, term,).title # If it is the main page, don't try to summarise it if re.search("[M,m]ain_[P,p]age", title) or re.search("[W,w]iki", title): return (False, False) if title.lower() == term: bit_comment_start = "" elif title.lower() != term: try: discard = wikia.page(sub_wikia, term,redirect=False).title except Exception as e: if re.search('resulted in a redirect',str(e)): bit_comment_start = "*\"" + term.strip() + "\" redirects to* " else: bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* " if re.search(r'#',title): url = wikia.page(sub_wikia, title.split('#')[0],).url sectionurl = url + "#" + title.split('#')[1] comment = "*Nearest match for* ***" + term.strip() + "*** *is the section ["+title.split('#')[1]+"]("+sectionurl.replace(')','\)')+") in article ["+title.split('#')[0]+"]("+url+").*\n\n---\n\n" post_reply(comment,post) log("RELEVANT SECTION SUGGESTED: %s"%filter(lambda x: x in string.printable, title)) return (False,False) url_string = title log("INTERPRETATION: %s"%filter(lambda x: x in string.printable, title)) return (url_string,bit_comment_start) except Exception as e: if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))): deflist = ">Definitions for few of those terms:" for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')): deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikia.summary(sub_wikia, val,sentences=1) if idx > 3: break summary = "*Oops,* ***"+term.strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n" log("ASKING FOR DISAMBIGUATION") else: log("INTERPRETATION FAIL: %s"%filter(lambda x: x in string.printable, term)) try: terms = "\""+term+"\"" suggesttitle = str(wikia.search(sub_wikia, terms,results=1)[0]) log("SUGGESTING: %s"%filter(lambda x: x in string.printable, suggesttitle)) if suggesttitle.lower() == term: bit_comment_start = "" else: bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* " if str(suggesttitle).endswith(')') and not re.search('\(',str(suggesttitle)): suggesttitle = suggesttitle[0:--(suggesttitle.__len__()-1)] return (str(suggesttitle),bit_comment_start) except: trialtitle = wikia.page(sub_wikia, term,).title if trialtitle.lower() == term: bit_comment_start = "" else: bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* " log("TRIAL SUGGESTION: %s"%filter(lambda x: x in string.printable, trialtitle)) if str(trialtitle).endswith(')') and not re.search('\(',str(trialtitle)): trialtitle = trialtitle[0:--(trialtitle.__len__()-1)] return (str(trialtitle),bit_comment_start) post_reply(summary,post) return (False,False)
elif re.search('#',tag['href']): tag.unwrap() continue elif not re.search(r'^http://',tag['href']): tag.replace_with(tag.text) continue rep = "["+tag.text+"]("+urlstart+tag['href'].replace(')','\)')+")" discard = tag.replace_with(rep) data = s.text #Post only first paragraph except Exception as e: fail("TEXT PACKAGE FAIL: %s"%e) if summary_call: try: term = url_string tell_me_text = wikia.summary(sub_wikia, term,redirect=True) tell_me_link = wikia.page(sub_wikia, term,).url title = wikia.page(sub_wikia, term,).title if bool(re.search(title,tell_me_text)): summary = re.sub(title,"[**"+title+"**]("+tell_me_link+")",tell_me_text) else: summary = "[**"+title+"**](" + tell_me_link + "): " + tell_me_text log("INTERPRETATION: %s"%filter(lambda x: x in string.printable, title)) if re.search(r'#',title): summary = wikia.page(sub_wikia, title.split('#')[0]).section(title.split('#')[1]) if summary == None or str(filter(lambda x: x in string.printable, summary)).strip() == "": page_url = wikia.page(sub_wikia, title.split('#')[0]).url summary = "Sorry, I failed to fetch the section, but here's the link: "+page_url+"#"+title.split('#')[1] if re.search(r'(',page_url): page_url = process_brackets_links(page_url) comment = "*Here you go:*\n\n---\n\n>\n"+summary+"\n\n---\n\n" post_reply(comment,post)
use_transcript = False out_file, in_file = 'zp.txt', 'zp_episodes.txt' with open(in_file, 'tr') as fin: episodes = set([unquote(line) for line in map(str.strip, fin) if line]) done, ready_file = set(), 'zp_done.txt' if os.path.isfile(ready_file): done = set(map(lambda x: x.strip(), open(ready_file))) episodes -= done with open(out_file, 'ta+') as fout, open(ready_file, 'at+') as ready: for episode in tqdm.tqdm(episodes): try: page = wikia.page('zeropunctuation', episode) if use_transcript: transcript = page.section('Transcript') else: transcript = page.content fout.write(f'EPISODE: {page.title}\n\n') if transcript is not None: fout.write(transcript.strip() + '\n\n\n') except WikiaError: print(f'could not read `{episode}`') else: ready.write(f'{episode}\n')
def GetComments(): ##### Get the comments we've replied to. ##### posts_replied_to = fetchComments() print(posts_replied_to) reply = "" ##### Get the comments in the subreddit ##### for comment in subreddit.comments(limit=20): ##### Check to see if the post has already been replied to, or if it even needs to be ##### if str( comment.id ) in posts_replied_to or not "!spongebot " in comment.body or ">!spongebot " in comment.body: continue else: ##### New Comment that we need to reply to ##### comment_replied = False print("New Comment - " + comment.id) ##### Replace the wake word ##### searchTerm = comment.body.replace("!spongebot ", "") ##### See if we can find out what Season/Episode From the String ##### ##### Returns False, [True, seasion, episode], or ["episode name", season, episode] ##### episode_info = findEpisode(searchTerm) if not episode_info == False and not episode_info == None: if episode_info[0] == True: ##### We looked, but there wasn't that episode in the CSV ##### reply = "[Sorry](https://vignette.wikia.nocookie.net/spongefan/images/3/3c/Squidward_the_Loser_%3AP.jpg/revision/latest?cb=20130120163943), It doesn't look like theres a Season " + str( episode_info[1]) + " Episode " + str( episode_info[2]) + ", try an episode name." else: ##### Let's split the episodes into their respective segments ##### print(episode_info) episodes = episode_info[0].split("/") print(episodes) ##### It's not a special, so theres more than 1 episode ##### if len(episodes) > 1: if not episode_info[3] == None: segment = episode_info[3] else: segment = 0 print("Found Segment: " + str(segment)) if not segment == 0: episode = episodes[segment - 1] print("Extracted Episode Name: " + episode) searchTerm = episode else: ##### Return the Episode Names with their respective URLS ##### reply = "Here's What I found on for " + searchTerm + ": \n\n ------------------------------------ \n\n" d = dict(enumerate(string.ascii_lowercase, 1)) i = 1 for episode in episodes: episode_url = wikia.page( "Spongebob", wikia.search("Spongebob", episode)[0]).url reply += "Season " + str( episode_info[1] ) + " Episode " + str(episode_info[2]) + d[ i] + ": [" + episode + "](" + urllib.parse.quote( episode_url).replace("%3A", ":") + ") \n\n" i = i + 1 else: ##### If there's only one, than we can just search that name ##### print(episodes) searchTerm = episodes[0] print("Found Episode: " + searchTerm) ##### If we don't already know what they want ##### if reply == "": ##### Look it up ##### print(searchTerm) search = wikia.search("Spongebob", searchTerm) print("Search returned - " + str(search)) ##### Eh, first one looks good ##### closest = search[0] ##### If there's a gallery, we can likely get that page ##### if "gallery" in closest.lower(): closest = closest.replace(" (gallery)", "") ##### Get the Summary ##### summary = wikia.page("Spongebob", closest).content.replace( u"\u2018", "'").replace(u"\u2019", "'").replace( "\\xa0", " ").replace("0xc2", "").replace("\\xao", "") ##### Header for our response ##### reply = "Here's What I found on the Spongebob Wiki for [" + searchTerm + "](" + urllib.parse.quote( wikia.page("Spongebob", closest).url).replace( "%3A", ":" ) + "): \n\n ------------------------------------ \n\n" ##### Let's maintain the lines ##### paragraphs = summary.split("\n") if len(paragraphs) < 3: ##### There's less than 3 paragraphs, so we'll just set it to the max ##### print("Wiki only returned " + str(len(paragraphs))) endIndex = len(paragraphs) else: ##### Otherwise, we only want 3 ##### endIndex = 3 ##### If theres no summary, just return an error ##### if endIndex >= 1: ##### Otherwise, return whatever we can ##### for i in range(0, endIndex): paragraph = paragraphs[i].strip() reply += paragraph + "\n\n" else: reply = "[Sorry](https://vignette.wikia.nocookie.net/spongefan/images/3/3c/Squidward_the_Loser_%3AP.jpg/revision/latest?cb=20130120163943), I didn't find anything regarding " + searchTerm + ", I usually work best with episode names." ##### Footer for our response ##### reply += "\n\n ------------------------------------ \n\n ^I'm ^a ^[bot](https://vignette.wikia.nocookie.net/spongebob/images/5/54/Robot_Spongebob2.jpg/revision/latest?cb=20130416211248), ^and ^this ^action ^was ^preformed ^automatically. \n\n Got a question for the creator? Message the evil genius [here](https://www.reddit.com/message/compose?to=pizzaface97&subject=SpongeBot2000%20Question)" print(reply) ##### Now let's try to post the comment ##### try: comment.reply(reply) #pass except praw.exceptions.APIException: ##### There was an issue, let's stop and try again later ##### print("Rate Limited -- Ending") comment_replied = False else: ##### We posted our reply ##### comment_replied = True ##### Log it, either way ##### if comment_replied == True: print("Comment Posted - " + comment.id) output = "Comment Posted - " + comment.id addComment(comment.id, True) else: print("No Comment Posted") output = "No Comment Posted" continue
def isearch(self, searchTerm: str): '''returns a generator that iterates though the search results pages''' for i in wikia.search(self.wiki, searchTerm): yield wikia.page(self.wiki, i)
# -*- coding: utf-8 -*- """ Filename: hs_collections_organiser.py Date created: Mon Aug 24 21:06:15 2020 @author: Julio Hong Purpose: Read wikitable to generate an Excel file Steps: """ import pandas as pd from os import path # I thought this would be useful but actually not really. It can't even scrape all the content properly. import wikia from wikia import html # To adjust the dataframe appearance pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 20) pd.set_option('display.width', 200) wikia.summary("Hypixel-Skyblock", "Collections") collections_page = wikia.page("Hypixel-Skyblock", "Collections") # I might as well scrape html using another more commonly-used lib. html = collections_page.html() # Each skill collection falls under table class=wikitable. # But each item has a less consistent format. # Kind of falls under 'tr' elem, but it's not unique to items # Can also apply to tiers, or counts, or rewards. # I can pull the data out. But how to organise it? That's the concern.