def snopes(text): """snopes <topic> -- Searches snopes for an urban legend about <topic>.""" search_page = http.get_html(search_url, sp_q=text, sp_c="1") result_urls = search_page.xpath("//a[@target='_self']/@href") if not result_urls: return "no matching pages found" snopes_page = http.get_html(result_urls[0]) snopes_text = snopes_page.text_content() claim = re.search(r"Claim: .*", snopes_text).group(0).strip() status = re.search(r"Status: .*", snopes_text) if status is not None: status = status.group(0).strip() else: # new-style statuses status = "Status: {}.".format(re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED", snopes_text).group(0).title()) claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace status = re.sub(r"[\s\xa0]+", " ", status) return "{} {} {}".format(claim, status, result_urls[0])
def pre(inp): """pre <query> -- searches scene releases using orlydb.com""" try: h = http.get_html("http://orlydb.com/", q=inp) except http.HTTPError as e: return 'Unable to fetch results: {}'.format(e) results = h.xpath("//div[@id='releases']/div/span[@class='release']/..") if not results: return "No results found." result = results[0] date = result.xpath("span[@class='timestamp']/text()")[0] section = result.xpath("span[@class='section']//text()")[0] name = result.xpath("span[@class='release']/text()")[0] # parse date/time date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") date_string = date.strftime("%d %b %Y") since = timesince.timesince(date) size = result.xpath("span[@class='inforight']//text()") if size: size = ' - ' + size[0].split()[0] else: size = '' return '{} - {}{} - {} ({} ago)'.format(section, name, size, date_string, since)
def reddit_url(match): thread = http.get_html(match.group(1)) title = thread.xpath('//title/text()')[0] upvotes = thread.xpath("//span[@class='upvotes']/span[@class='number']/text()")[0] downvotes = thread.xpath("//span[@class='downvotes']/span[@class='number']/text()")[0] author = thread.xpath("//div[@id='siteTable']//a[contains(@class,'author')]/text()")[0] timeago = thread.xpath("//div[@id='siteTable']//p[@class='tagline']/time/text()")[0] comments = thread.xpath("//div[@id='siteTable']//a[@class='comments']/text()")[0] return '\x02{}\x02 - posted by \x02{}\x02 {} ago - {} upvotes, {} downvotes - {}'.format( title, author, timeago, upvotes, downvotes, comments)
def nhlScores(text=" "): """nhl <team city> gets the score or next scheduled game for the specified team. If no team is specified all games will be included.""" response = http.get_html('http://scores.espn.go.com/nhl/bottomline/scores', decode=False) game = "" score = response.text_content() raw=score.replace('%20',' ') raw=raw.replace('^','') raw=raw.replace('&','\n') pattern = re.compile("nhl_s_left\d+=(.*)") for match in re.findall(pattern, raw): if text.lower() in match.lower(): game = game + match + " " return(game)
def etymology(text, message): """<word> - Retrieves the etymology of chosen word.""" url = 'http://www.etymonline.com/search' try: params = {'q': text} h = http.get_html(url, params=params) except: return "Error fetching etymology." etym = h.xpath('//section') if not etym: return 'No etymology found for ' + text etym = etym[0].text_content() etym = ' '.join(etym.split()) message(formatting.truncate_str(etym, 400))
def scrape_scores(conn, chan, game, text): if not text: text = " " response = http.get_html( 'http://scores.espn.go.com/{}/bottomline/scores'.format(game), decode=False) score = response.text_content() raw = score.replace('%20', ' ') raw = raw.replace('^', '') raw = raw.replace('&', '\n') pattern = re.compile(r"{}_s_left\d+=(.*)".format(game)) scores = [] for match in re.findall(pattern, raw): if text.lower() in match.lower(): scores.append(match) return page_scores(conn, chan, scores)
def nbaScores(chan, text=" "): """nba <team city> gets the score or next scheduled game for the specified team. If no team is specified all games will be included.""" search_pages[chan] = [] search_pages[chan+"index"] = 0 response = http.get_html('http://scores.espn.go.com/nba/bottomline/scores', decode=False) game = "" score = response.text_content() raw=score.replace('%20',' ') raw=raw.replace('^','') raw=raw.replace('&','\n') pattern = re.compile("nba_s_left\d+=(.*)") for match in re.findall(pattern, raw): if text.lower() in match.lower(): game = game + match + " | " game = smart_truncate(game) game = game[:-2] game = two_lines(game, chan) if len(search_pages[chan]) > 1: return "{}(page {}/{}) .morescore".format(game, search_pages[chan+"index"] + 1, len(search_pages[chan])) return(game)
def mcwiki(text): """mcwiki <phrase> - gets the first paragraph of the Minecraft Wiki article on <phrase>""" try: j = http.get_json(api_url, search=text) except (http.HTTPError, http.URLError) as e: return "Error fetching search results: {}".format(e) except ValueError as e: return "Error reading search results: {}".format(e) if not j[1]: return "No results found." # we remove items with a '/' in the name, because # gamepedia uses sub-pages for different languages # for some stupid reason items = [item for item in j[1] if not "/" in item] if items: article_name = items[0].replace(' ', '_').encode('utf8') else: # there are no items without /, just return a / one article_name = j[1][0].replace(' ', '_').encode('utf8') url = mc_url + http.quote(article_name, '') try: page = http.get_html(url) except (http.HTTPError, http.URLError) as e: return "Error fetching wiki page: {}".format(e) for p in page.xpath('//div[@class="mw-content-ltr"]/p'): if p.text_content(): summary = " ".join(p.text_content().splitlines()) summary = re.sub("\[\d+\]", "", summary) summary = formatting.truncate_str(summary, 200) return "{} :: {}".format(summary, url) # this shouldn't happen return "Unknown Error."
def mlbScores(chan, text=" "): """mlb <team city> gets the score or next scheduled game for the specified team. If no team is specified all games will be included.""" search_pages[chan] = [] search_pages[chan + "index"] = 0 response = http.get_html('http://scores.espn.go.com/mlb/bottomline/scores', decode=False) game = "" score = response.text_content() raw = score.replace('%20', ' ') raw = raw.replace('^', '') raw = raw.replace('&', '\n') pattern = re.compile("mlb_s_left\d+=(.*)") for match in re.findall(pattern, raw): if text.lower() in match.lower(): game = game + match + " | " game = smart_truncate(game) game = game[:-2] game = two_lines(game, chan) if len(search_pages[chan]) > 1: return "{}(page {}/{}) .morescore".format( game, search_pages[chan + "index"] + 1, len(search_pages[chan])) return (game)
def metacritic(text): """[all|movie|tv|album|x360|ps3|pc|gba|ds|3ds|wii|vita|wiiu|xone|ps4] <title> - gets rating for <title> from metacritic on the specified medium""" args = text.strip() game_platforms = ('x360', 'ps3', 'pc', 'gba', 'ds', '3ds', 'wii', 'vita', 'wiiu', 'xone', 'ps4') all_platforms = game_platforms + ('all', 'movie', 'tv', 'album') try: plat, title = args.split(' ', 1) if plat not in all_platforms: # raise the ValueError so that the except block catches it # in this case, or in the case of the .split above raising the # ValueError, we want the same thing to happen raise ValueError except ValueError: plat = 'all' title = args cat = 'game' if plat in game_platforms else plat title_safe = http.quote_plus(title) url = 'http://www.metacritic.com/search/{}/{}/results'.format(cat, title_safe) try: doc = http.get_html(url) except HTTPError: return 'error fetching results' # get the proper result element we want to pull data from result = None if not doc.find_class('query_results'): return 'No results found.' # if they specified an invalid search term, the input box will be empty if doc.get_element_by_id('search_term').value == '': return 'Invalid search term.' if plat not in game_platforms: # for [all] results, or non-game platforms, get the first result result = doc.find_class('result first_result')[0] # find the platform, if it exists result_type = result.find_class('result_type') if result_type: # if the result_type div has a platform div, get that one platform_div = result_type[0].find_class('platform') if platform_div: plat = platform_div[0].text_content().strip() else: # otherwise, use the result_type text_content plat = result_type[0].text_content().strip() else: # for games, we want to pull the first result with the correct # platform results = doc.find_class('result') for res in results: result_plat = res.find_class('platform')[0].text_content().strip() if result_plat == plat.upper(): result = res break if not result: return 'No results found.' # get the name, release date, and score from the result product_title = result.find_class('product_title')[0] name = product_title.text_content() link = 'http://metacritic.com' + product_title.find('a').attrib['href'] try: release = result.find_class('release_date')[0]. \ find_class('data')[0].text_content() # strip extra spaces out of the release date release = re.sub(r'\s{2,}', ' ', release) except IndexError: release = None try: score = result.find_class('metascore_w')[0].text_content() except IndexError: score = None return '[{}] {} - \x02{}/100\x02, {} - {}'.format(plat.upper(), name, score or 'no score', 'release: \x02%s\x02' % release if release else 'unreleased', link)