def get_beer(inp): """ search beeradvocate.com """ search_url = "http://beeradvocate.com/search" base_url = "http://beeradvocate.com" try: xfToken = http.get_html(base_url).xpath( "//fieldset[@id='QuickSearch']/form[@class='formPopup']/input" )[0].value except IndexError: return "Unable to retrieve token." post_dict = { 'q': inp, 'qt': 'beer', '_xfToken': xfToken, } results = http.get_html(search_url, post_data=urlencode(post_dict)) try: result = results.xpath( "//div[@id='content']/div[@class='pageWidth']/div[@class='pageContent']/div[@class='mainContainer']/div[@class='mainContent']/fieldset/div[@id='baContent']/div[2]/ul/li[1]" )[0] except IndexError: return "No Results" page_url = base_url + result.xpath('a')[0].get('href') scores = http.get_html(page_url).cssselect('.BAscore_big') beer_info = [x.text_content() for x in result.xpath('a')] return "%s by %s :: Community Score: %s :: Bros Score: %s :: %s" % ( beer_info[0], beer_info[1], scores[0].text_content(), scores[1].text_content(), page_url)
def snopes(inp): ".snopes <topic> -- searches snopes for an urban legend about <topic>" search_page = http.get_html(search_url, sp_q=inp, sp_c="1") result_urls = search_page.xpath("//a[@target='_self']/@href") if not result_urls: return "no matching pages found" snopes_page = http.get_html(result_urls[0]) snopes_text = snopes_page.text_content() claim = re.search(r"Claim: .*", snopes_text).group(0).strip() status = re.search(r"Status: .*", snopes_text) if status is not None: status = status.group(0).strip() else: # new-style statuses status = "Status: %s." % re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED", snopes_text).group(0).title() claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace status = re.sub(r"[\s\xa0]+", " ", status) return "%s %s %s" % (claim, status, result_urls[0])
def man(inp, say=''): """.man <command> [section] - Returns man page for specified command, section defaults to 1 if not specified.""" raw = inp.split() command = raw[0] if len(raw) == 2 and raw[1].isdigit(): page = raw[1] else: page = "1" try: manpage = str(http.get_html(base_url, topic=command, section=page)) # If not specified man page if re.match(r'.+(\>No matches for ").+', manpage): page = "all" manpage = str(http.get_html(base_url, topic=command, section=page)) # If man page exists for command if not re.match(r'.+(\>No matches for ").+', manpage) and 1 == 2: if page != "all": say("{} - {}({})".format(web.try_googl(base_url.format(command, page)), command, page)) else: say("{} - {}({}) (No section {})".format((web.try_googl(base_url.format(command, page)), command, page, raw[1]))) else: system_manpage = get_system_manpage(command) if system_manpage: haste_url = web.haste(system_manpage, ext='txt') googl_url = web.try_googl(haste_url) say("{} - {}".format(googl_url, command, page)) else: return "There is no man page for {}.".format(command) except Exception as e: # (http.HTTPError, http.URLError) as e: print(">>> u'HTTP Error: {}'".format(e)) return "HTTP Error, please try again in a few minutes."
def snopes(inp): """snopes <topic> -- Searches snopes for an urban legend about <topic>.""" search_page = http.get_html(search_url, sp_q=inp, sp_c="1") result_urls = search_page.xpath("//a[@target='_self']/@href") if not result_urls: return "no matching pages found" snopes_page = http.get_html(result_urls[0]) snopes_text = snopes_page.text_content() claim = re.search(r"Claim: .*", snopes_text).group(0).strip() status = re.search(r"Status: .*", snopes_text) if status is not None: status = status.group(0).strip() else: # new-style statuses status = "Status: %s." % re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED", snopes_text).group(0).title() claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace status = re.sub(r"[\s\xa0]+", " ", status) return "{} {} {}".format(claim, status, result_urls[0])
def get_beer(inp): """ search beeradvocate.com """ search_url = "http://beeradvocate.com/search" base_url = "http://beeradvocate.com" post_dict = { 'q' : inp, 'qt' : 'beer', } results = http.get_html(search_url, post_data=urlencode(post_dict)) try: result = results.xpath("//div[@id='content']/div[@class='pageWidth']/div[@class='pageContent']/div[@class='mainContainer']/div[@class='mainContent']/fieldset/div[@id='baContent']/div[2]/ul/li[1]")[0] except IndexError: return "No Results" page_url = base_url + result.xpath('a')[0].get('href') scores = http.get_html(page_url).cssselect('.BAscore_big') beer_info = [x.text_content() for x in result.xpath('a')] return "%s by %s :: Community Score: %s :: Bros Score: %s :: %s" % (beer_info[0], beer_info[1], scores[0].text_content(), scores[1].text_content(), page_url)
def mtg(inp): """.mtg <name> - Gets information about Magic the Gathering card <name>.""" url = 'http://magiccards.info/query?v=card&s=cname' h = http.get_html(url, q=inp) name = h.find('body/table/tr/td/span/a') if name is None: return "no cards found" card = name.getparent().getparent().getparent() type = card.find('td/p').text.replace('\n', '') # this is ugly text = http.html.tostring(card.xpath("//p[@class='ctext']/b")[0]) text = text.replace('<br>', '$') text = http.html.fromstring(text).text_content() text = re.sub(r'(\w+\s*)\$+(\s*\w+)', r'\1. \2', text) text = text.replace('$', ' ') text = re.sub(r'\(.*?\)', '', text) # strip parenthetical explanations text = re.sub(r'\.(\S)', r'. \1', text) # fix spacing name.make_links_absolute(base_url=url) link = name.attrib['href'] name = name.text_content().strip() type = type.strip() text = ' '.join(text.split()) return ( ' | '.join( (" ".join(name.split()), " ".join(type.split()), " ".join(text.split()), link)) )
def readtitle(match, say=None, nick=None): parsed_url = match.group().split(' ')[0] if any(word in parsed_url for word in skipurls): return try: request_url = http.get_html(parsed_url) except http.HTTPError as e: errors = {400: 'bad request (ratelimited?) 400', 401: 'unauthorized 401 ', 403: 'forbidden 403', 404: 'invalid user/id 404', 500: 'something is broken 500', 502: 'something is down ("getting upgraded?") 502', 503: 'something is overloaded 503', 410: 'something something 410'} if e.code == 404: return 'bad url?' if e.code in errors: return 'error: ' + errors[e.code] return 'error: unknown %s' % e.code try: titleget = request_url.xpath('//title/text()')[0] titleuni = " - " + unicode(titleget.strip()) except IndexError: titleuni = "" shorturl = web.try_googl(parsed_url) say(shorturl + titleuni)
def get_rottentomatoes_data(movie_id): if movie_id.startswith('/m/'): movie_id = movie_id[3:] document = http.get_html(MOVIE_PAGE_URL % movie_id) # JSON for the page is stored in the script with ID 'jsonLdSchema' # So we can pull that for tons of information. ld_schema_element = document.xpath( "//script[@type='application/ld+json']")[0] ld_schema = json.loads(ld_schema_element.text_content()) scripts = '\n'.join(document.xpath('//script/text()')) score_info = json.loads(re.search( r'scoreInfo = (.*);', scripts).group(1))['tomatometerAllCritics'] try: audience_score = document.xpath( '//span[contains(@class, "audience") and contains(@class, "rating")]/text()' )[0].strip() except IndexError: audience_score = '' return { 'title': ld_schema['name'], 'critics_score': score_info['score'], 'audience_score': audience_score, 'fresh': score_info['freshCount'], 'rotten': score_info['rottenCount'], 'url': MOVIE_PAGE_URL % movie_id }
def ebay_url(match, bot): item = http.get_html(match) title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip() price = item.xpath("//span[@id='prcIsum_bidPrice']/text()") if not price: price = item.xpath("//span[@id='prcIsum']/text()") if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()") if price: price = price[0].strip() else: price = '?' try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip() except: bids = "Buy It Now" feedback = item.xpath("//span[@class='w2b-head']/text()") if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()") if feedback: feedback = feedback[0].strip() else: feedback = '?' return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format( title, price, bids, feedback))
def check_touhou(inp,chan=None,bot=None): #if channel[chan]: channels = bot.channelconfig.walk(gather_subsection) for channel in channels: print channel return chan_url = http.quote('{channel|%s}/1' % '#pantsumen') #str(chan) url='http://booru.touhouradio.com/post/list/%s' % chan_url try: html = http.get_html(url) except ValueError: return None firstimage = html.xpath("//span[@class='thumb']//img/@src")[0] try: if firstimage in touhou_list[chan]: return "New Activity on TouhouRadio!" except: pass touhou_list[chan] = firstimage print touhou_list[chan]
def legal(inp): now = datetime.datetime.now() name = inp.replace(' ', '_') html = http.get_html('http://rottentomatoes.com/celebrity/%s/' % (name)) date = html.xpath('//dl[@class="bottom_divider"]/dd/text()')[0] #return date info = date.split(' ') month = info[0] birth_day = info[1].strip(",") birth_year = info[2] birth_month = months[month] birthdate = datetime.date(int(birth_year), int(birth_month), int(birth_day)) age = now.year - int(birth_year) if age >= 18: return "legal - is %s" % (age) else: year_18 = int(birth_year) + 18 birthday_18 = "%s %s %s" % (birth_day, full_month[month], year_18) #return birthday_18 #return "%s :: %s" % (birth_month, str(day_18)) return "%s will be 18 in %s" % (inp, timesince.timeuntil(birthdate, now=birthday_18)) return months[birth_month]
def urban(inp): '''.u/.urban <phrase> [#] -- looks up <phrase> with [#] definition on urbandictionary.com''' args = inp.split(" ") # Look for a number to cycle through definitions, optionally if(len(args) > 1): try: int(args[-1]) number = int(args.pop()) index = number - 1 except(ValueError): index = 0 else: index = 0 args = " ".join(args) url = 'http://www.urbandictionary.com/define.php' page = http.get_html(url, term=args) words = page.xpath("//*[@id='entries']/div/span") defs = page.xpath("//div[@class='definition']") if not defs: return 'no definitions found' # Put together a string from the xpath requests. out = words[index].text.strip() + ': ' + ' '.join( defs[index].text.split()) if len(out) > 400: out = out[:out.rfind(' ', 0, 400)] + '...' return out
def predb(inp): '.predb <query> -- searches scene releases using orlydb.com' try: h = http.get_html("http://orlydb.com/", q=inp) except HTTPError: return 'orlydb seems to be down' results = h.xpath("//div[@id='releases']/div/span[@class='release']/..") if not results: return "zero results" result = results[0] date, time = result.xpath("span[@class='timestamp']/text()")[0].split() section, = result.xpath("span[@class='section']//text()") name, = result.xpath("span[@class='release']/text()") size = result.xpath("span[@class='inforight']//text()") if size: size = ' :: ' + size[0].split()[0] else: size = '' return '%s - %s - %s%s' % (date, section, name, size)
def steamcalc(inp, nick='', chan='', say=None): '''Usage: '.steamcalc username'. Grab's selected user's steam accounts monetary worth in USD.''' inpEncode = urllib.quote(inp) try: h = http.get_html("http://steamcalculator.com/id/%s" % inpEncode) except urllib2.HTTPError: return("Hmm it looks like you entered an incorrect name. Be sure that it has no spaces or non ascii characters.") try: getAmountText = h.xpath('//div[@id="rightdetail"]/text()')[0] except IndexError: say("That user doesnt exist or something. F**k off.") getAmountNum = h.xpath('//div[@id="rightdetail"]/h1/text()')[0] #getLastGame = h.xpath('// amountSplit = getAmountText.split(' ') amountGameNum = int(amountSplit[1]) moneySplit = getAmountNum.split(' ') amountMonetary = moneySplit[0] valueStrip = amountMonetary.strip().lstrip("$") value = float(valueStrip) output = "\x02%s\x0f owns \x02%i\x0f games on Steam. Their account is worth \x02$%.2f\x0f." % (inp, amountGameNum, value) if amountGameNum >= 125: output = output + " <--- jesus f**k quit buying games you neckbeard." return(output)
def forum_link(inp, bot=None): if 'sa_user' not in bot.config or \ 'sa_password' not in bot.config: return login(bot.config['sa_user'], bot.config['sa_password']) thread = http.get_html(showthread, threadid=inp.group(1), perpage='1', cookies=True) breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()') if not breadcrumbs: return thread_title = breadcrumbs[-1] forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2]) poster = thread.xpath('//dt[@class="author"]/text()')[0] # 1 post per page => n_pages = n_posts num_posts = thread.xpath('//a[@title="last page"]/@href') if not num_posts: num_posts = 1 else: num_posts = int(num_posts[0].rsplit('=', 1)[1]) return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % ( forum_title, thread_title, poster, num_posts, 's' if num_posts > 1 else '')
def ebay_url(match,bot): apikey = bot.config.get("api_keys", {}).get("ebay") # if apikey: # # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I) # itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I) # url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1)) # print url # else: print "No eBay api key set." item = http.get_html(match) title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip() price = item.xpath("//span[@id='prcIsum_bidPrice']/text()") if not price: price = item.xpath("//span[@id='prcIsum']/text()") if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()") if price: price = price[0].strip() else: price = '?' try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip() except: bids = "Buy It Now" feedback = item.xpath("//span[@class='w2b-head']/text()") if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()") if feedback: feedback = feedback[0].strip() else: feedback = '?' return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(title, price, bids, feedback))
def time(inp, nick="", reply=None, db=None, notice=None): "time [location] [dontsave] | [@ nick] -- Gets time for <location>." save = True if '@' in inp: nick = inp.split('@')[1].strip() location = database.get(db,'users','location','nick',nick) if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore')) else: location = database.get(db,'users','location','nick',nick) if not inp: if not location: notice(time.__doc__) return else: if not location: save = True if " save" in inp: save = True location = inp.split()[0] # now, to get the actual time try: url = "https://www.google.com/search?q=time+in+%s" % location.replace(' ','+').replace(' save','') html = http.get_html(url) prefix = html.xpath("//div[contains(@class,'vk_c vk_gy')]//span[@class='vk_gy vk_sh']/text()")[0].strip() curtime = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_bk vk_ans']/text()")[0].strip() day = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/text()")[0].strip() date = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/span/text()")[0].strip() except IndexError: return "Could not get time for that location." if location and save: database.set(db,'users','location',location,'nick',nick) return u'{} is \x02{}\x02 [{} {}]'.format(prefix, curtime, day, date)
def define(text): """define <word> -- Fetches definition of <word>. :type text: str """ url = 'http://ninjawords.com/' h = http.get_html(url + http.quote_plus(text)) definition = h.xpath('//dd[@class="article"] | ' '//div[@class="definition"] |' '//div[@class="example"]') if not definition: return 'No results for ' + text + ' :(' result = format_output(h, definition, True) if len(result) > 450: result = format_output(h, definition, False) if len(result) > 450: result = result[:result.rfind(' ', 0, 450)] result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...' return result
def timefunction2(inp, nick="", reply=None, db=None, notice=None): "time [location] [dontsave] | [@ nick] -- Gets time for <location>." save = True if '@' in inp: nick = inp.split('@')[1].strip() location = database.get(db,'users','location','nick',nick) if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore')) else: location = database.get(db,'users','location','nick',nick) if not inp: if not location: notice(time.__doc__) return else: # if not location: save = True if " dontsave" in inp: save = False location = inp.split()[0] # now, to get the actual time try: url = "https://time.is/%s" % location.replace(' ','+').replace(' save','') html = http.get_html(url) prefix = html.xpath("//div[@id='msgdiv']/h1/a/text()")[0].strip() curtime = html.xpath("//div[contains(@id,'twd')]/text()")[0].strip() ampm = html.xpath("//div[contains(@id,'twd')]/span/text()")[0].strip() date = html.xpath("//h2[contains(@id,'dd')]/text()")[0].strip() except IndexError: return "Could not get time for that location." if location and save: database.set(db,'users','location',location,'nick',nick) return u'Time in {} is \x02{} {}\x02 [{}]'.format(prefix, curtime, ampm.upper(), date)
def fixchewy(match,chan='',input=None): if match.group(1) is not None: if "Error" not in input.lastparam or "L: " in input.lastparam: return card = match.group(1) else: url = match.group(2) result = http.get_html(url) card = result.xpath('//title')[0].text.split("(")[0].strip() result = http.get_json("http://api.deckbrew.com/mtg/cards", name=card) if len(result) == 0: return for cards in result: if cards['name'].lower() == card.lower(): card = cards break for edition in card['editions']: if edition['set_id'][:1] != "p" and 'price' in edition: price = edition['price'] break if price: prices = "L: $%s M: $%s H: $%s" % ('{:.2f}'.format(price['low']/100.), '{:.2f}'.format(price['median']/100.), '{:.2f}'.format(price['high']/100.)) return "chewy's bot sucks here are prices: %s" % prices
def amazon(inp): """az [query] -- Searches amazon for query""" href = "http://www.amazon.com/s/url=search-alias%3Daps&field-keywords={}".format(inp.replace(" ","%20")) results = http.get_html(href) # title = results.xpath('//title/text()')[0] try: title = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/h2/text()")[0] url = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/@href")[0] price = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/a/span/text()")[0] rating = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0] except: title = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/h2/text()")[0] url = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/@href")[0] price = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/a/span/text()")[0] rating = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0] azid = re.match(r'^.*\/dp\/([\w]+)\/.*',url).group(1) star_count = round(float(rating.split(' ')[0]),0) stars="" for x in xrange(0,int(star_count)): stars = "{}{}".format(stars,'★') for y in xrange(int(star_count),5): stars = "{}{}".format(stars,'☆') return '\x02{}\x02 - {} - \x034{}\x02 - http://amzn.com/{}'.format(title, stars, price, azid).decode('utf-8')
def check_touhou(inp, chan=None, bot=None): #if channel[chan]: channels = bot.channelconfig.walk(gather_subsection) for channel in channels: print channel return chan_url = http.quote('{channel|%s}/1' % '#pantsumen') #str(chan) url = 'http://booru.touhouradio.com/post/list/%s' % chan_url try: html = http.get_html(url) except ValueError: return None firstimage = html.xpath("//span[@class='thumb']//img/@src")[0] try: if firstimage in touhou_list[chan]: return "New Activity on TouhouRadio!" except: pass touhou_list[chan] = firstimage print touhou_list[chan]
def pre(inp): """pre <query> -- searches scene releases using orlydb.com""" try: h = http.get_html("http://orlydb.com/", q=inp) except http.HTTPError as e: return 'Unable to fetch results: {}'.format(e) results = h.xpath("//div[@id='releases']/div/span[@class='release']/..") if not results: return "No results found." result = results[0] date = result.xpath("span[@class='timestamp']/text()")[0] section = result.xpath("span[@class='section']//text()")[0] name = result.xpath("span[@class='release']/text()")[0] # parse date/time date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") date_string = date.strftime("%d %b %Y") since = timesince.timesince(date) size = result.xpath("span[@class='inforight']//text()") if size: size = ' - ' + size[0].split()[0] else: size = '' return '{} - {}{} - {} ({} ago)'.format(section, name, size, date_string, since)
def forum_link(inp, api_key=None): if api_key is None or 'user' not in api_key or 'password' not in api_key: return login(api_key['user'], api_key['password']) thread = http.get_html(showthread, threadid=inp.group(1), perpage='1', cookies=True) breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()') if not breadcrumbs: return thread_title = breadcrumbs[-1] forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2]) poster = thread.xpath('//dt[contains(@class, author)]//text()')[0] # 1 post per page => n_pages = n_posts num_posts = thread.xpath('//a[@title="Last page"]/@href') if not num_posts: num_posts = 1 else: num_posts = int(num_posts[0].rsplit('=', 1)[1]) return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % ( forum_title, thread_title, poster, num_posts, 's' if num_posts > 1 else '')
def ebay_url(match, bot): apikey = bot.config.get("api_keys", {}).get("ebay") # if apikey: # # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I) # itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I) # url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1)) # print url # else: print "No eBay api key set." item = http.get_html(match) title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip() price = item.xpath("//span[@id='prcIsum_bidPrice']/text()") if not price: price = item.xpath("//span[@id='prcIsum']/text()") if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()") if price: price = price[0].strip() else: price = '?' try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip() except: bids = "Buy It Now" feedback = item.xpath("//span[@class='w2b-head']/text()") if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()") if feedback: feedback = feedback[0].strip() else: feedback = '?' return http.process_text( "\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format( title, price, bids, feedback))
def get_rottentomatoes_data(movie_id): if movie_id.startswith("/m/"): movie_id = movie_id[3:] document = http.get_html(MOVIE_PAGE_URL % movie_id) # JSON for the page is stored in the script with ID 'jsonLdSchema' # So we can pull that for tons of information. ld_schema_element = document.xpath( "//script[@type='application/ld+json']")[0] ld_schema = json.loads(ld_schema_element.text_content()) scripts = "\n".join(document.xpath("//script/text()")) score_info = json.loads(re.search( r"scoreInfo = (.*);", scripts).group(1))["tomatometerAllCritics"] try: audience_score = document.xpath( '//span[contains(@class, "audience") and contains(@class, "rating")]/text()' )[0].strip() except IndexError: audience_score = "" return { "title": ld_schema["name"], "critics_score": score_info["score"], "audience_score": audience_score, "fresh": score_info["freshCount"], "rotten": score_info["rottenCount"], "url": MOVIE_PAGE_URL % movie_id, }
def rating(inp): if "scp-" not in inp: inp = "scp-" + inp print("Calling http.get() on http://www.scp-wiki.net/%s" % inp) page = http.get("http://www.scp-wiki.net/%s" % inp) rating = http.get_html( "http://www.scp-wiki.net/%s" % inp).xpath("//*[@class='number prw54353']/text()")[0] return rating
def fuckmylife(inp, nick='', chan='', say=None): h = http.get_html("http://m.fmylife.com/random/") #else: # h = http.get_html("http://iphone.fmylife.com/%s") % (inp) fmlContent = h.xpath('//p[@class="text"]/text()')[0] fmlID = h.xpath('//p[@class="infos"]/a/text()')[0] fmlOutput = "%s :: \x0307%s\x0F" % (fmlContent, fmlID, fmlLink) say(fmlOutput)
def redtube(inp, nick='', chan='', say=None): search = urllib.quote_plus(inp) searchURL = "http://redtube.com/?search=%s" % (search) getSearch = http.get_html(searchURL) videoTitle = getSearch.xpath('/html/body/div/div/div[3]/ul/li/div[2]/h2/a/')[0] #videoUrl = getSearch.xpath('//div[@class="video"]/a/href/text()')[0] #final = "%s :: %s" % (videoTitle, videoUrl) say(videoTitle)
def debt(inp): """debt -- returns the us national debt""" href = "http://www.nationaldebtclocks.org/debtclock/unitedstates" results = http.get_html(href) debt = results.xpath("//span[@id='debtDisplayFast']/text()")[0] householdshare = results.xpath("//span[@id='SCS']/text()")[0] return("Current US Debt: \x02${:,}\x02 - Amount Per Citizen: \x02{}\x02".format(int(debt), householdshare))
def getfic(inp): input = inp.split(" ")[0] url = "http://www.fanfiction.net/search.php?type=story&plus_keywords=%s&match=any&minus_keywords=&sort=0&genreid=0&subgenreid=0&characterid=0&subcharacterid=0&words=0&ready=1&categoryid=0" % input f = http.get_html(url) author = f.xpath('//div[@class="z-list"]/a/text()')[0] title = f.xpath('//div[@class="z-list"]/a/b/text()')[0] content = f.xpath('//div[@class="z-list"]/div[@class="z-indent z-padtop"]/text()')[0] return title + " by " + author + " :: " + content + inp
def define(inp): """.define/.dict <word> - fetches definition of <word>.""" url = 'http://ninjawords.com/' try: h = http.get_html(url + http.quote_plus(inp)) except: return "API error; please try again in a few minutes." definition = h.xpath('//dd[@class="article"] | ' '//div[@class="definition"] |' '//div[@class="example"]') if not definition: return 'No results for ' + inp def format_output(show_examples): result = '%s: ' % h.xpath('//dt[@class="title-word"]/a/text()')[0] correction = h.xpath('//span[@class="correct-word"]/text()') if correction: result = 'definition for "%s": ' % correction[0] sections = [] for section in definition: if section.attrib['class'] == 'article': sections += [[section.text_content() + ': ']] elif section.attrib['class'] == 'example': if show_examples: sections[-1][-1] += ' ' + section.text_content() else: sections[-1] += [section.text_content()] for article in sections: result += article[0] if len(article) > 2: result += ' '.join('%d. %s' % (n + 1, section) for n, section in enumerate(article[1:])) else: result += article[1] + ' ' synonyms = h.xpath('//dd[@class="synonyms"]') if synonyms: result += synonyms[0].text_content() result = re.sub(r'\s+', ' ', result) result = re.sub('\xb0', '', result) return result result = format_output(True) if len(result) > 450: result = format_output(False) if len(result) > 450: result = result[:result.rfind(' ', 0, 450)] result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...' return result
def debt(inp): """debt -- returns the us national debt""" href = "http://www.nationaldebtclocks.org/debtclock/unitedstates" results = http.get_html(href) debt = results.xpath("//span[@id='debtDisplayFast']/text()")[0] householdshare = results.xpath("//span[@id='SCS']/text()")[0] return ("Current US Debt: \x02${:,}\x02 - Amount Per Citizen: \x02{}\x02". format(int(debt), householdshare))
def define(inp): """define <word> -- Fetches definition of <word>.""" url = 'http://ninjawords.com/' h = http.get_html(url + http.quote_plus(inp)) definition = h.xpath('//dd[@class="article"] | ' '//div[@class="definition"] |' '//div[@class="example"]') if not definition: return 'No results for ' + inp + ' :(' def format_output(show_examples): result = '{}: '.format( h.xpath('//dt[@class="title-word"]/a/text()')[0]) correction = h.xpath('//span[@class="correct-word"]/text()') if correction: result = 'Definition for "{}": '.format(correction[0]) sections = [] for section in definition: if section.attrib['class'] == 'article': sections += [[section.text_content() + ': ']] elif section.attrib['class'] == 'example': if show_examples: sections[-1][-1] += ' ' + section.text_content() else: sections[-1] += [section.text_content()] for article in sections: result += article[0] if len(article) > 2: result += u' '.join(u'{}. {}'.format(n + 1, section) for n, section in enumerate(article[1:])) else: result += article[1] + ' ' synonyms = h.xpath('//dd[@class="synonyms"]') if synonyms: result += synonyms[0].text_content() result = re.sub(r'\s+', ' ', result) result = re.sub('\xb0', '', result) return result result = format_output(True) if len(result) > 450: result = format_output(False) if len(result) > 450: result = result[:result.rfind(' ', 0, 450)] result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...' return result
def getfic(inp): input = inp.split(" ")[0] url = "http://www.fanfiction.net/search.php?type=story&plus_keywords=%s&match=any&minus_keywords=&sort=0&genreid=0&subgenreid=0&characterid=0&subcharacterid=0&words=0&ready=1&categoryid=0" % input f = http.get_html(url) author = f.xpath('//div[@class="z-list"]/a/text()')[0] title = f.xpath('//div[@class="z-list"]/a/b/text()')[0] content = f.xpath( '//div[@class="z-list"]/div[@class="z-indent z-padtop"]/text()')[0] return title + " by " + author + " :: " + content + inp
def manga(inp): "manga <query> - batoto Search" search_url = 'http://www.batoto.net/search?name=%s&name_cond=c&dosubmit=Search' % (inp.replace(' ','+')) results = http.get_html(search_url) try: result = results.xpath("//tbody//strong/a/@href")[0] return u'%s' % (result) except IndexError: return u'No matches found.'
def get_inmate(self, id_num): inmate = {} url = self.inmate_url + str(id_num) h = http.get_html(url) try: name = h.xpath("//td/font/text()")[0].strip(' ').split(',') inmate['first_name'] = name[1].strip(' ') inmate['last_name'] = name[0].strip(' ') inmate['age'] = h.xpath( "//td[text()='Age at Booking:']/following-sibling::td")[0].text inmate['race'] = h.xpath( "//td[text()='Race:']/following-sibling::td")[0].text inmate['sex'] = h.xpath( "//td[text()='Sex:']/following-sibling::td")[0].text inmate['eyes'] = h.xpath( "//td[text()='Eyes:']/following-sibling::td")[0].text inmate['hair'] = h.xpath( "//td[text()='Hair:']/following-sibling::td")[0].text inmate['height'] = h.xpath( "//td[text()='Height:']/following-sibling::td")[0].text inmate['weight'] = h.xpath( "//td[text()='Weight:']/following-sibling::td")[0].text inmate['booking_date'] = h.xpath( "//td[text()='Booking Date:']/following-sibling::td")[0].text inmate['booking_time'] = h.xpath( "//td[text()='Booking Time:']/following-sibling::td")[0].text inmate['url'] = self.url_short(url) inmate['charge'] = removeNonAscii( h.xpath("//td/font/text()")[4]).strip() r = h.xpath("//td[text()='Race:']/following-sibling::td")[0].text if r == 'H': race = 'Hispanic' elif r == 'A': race = 'Asian' elif r == 'B': race = 'Black' else: race = 'White' inmate['race'] = race sex = h.xpath("//td[text()='Sex:']/following-sibling::td")[0].text if sex == 'F': inmate['sex'] = 'Female' else: inmate['sex'] = 'Male' return inmate except IndexError: return 'inmate does not exist'
def define(inp): """define <word> -- Fetches definition of <word>.""" url = 'http://ninjawords.com/' h = http.get_html(url + http.quote_plus(inp)) definition = h.xpath('//dd[@class="article"] | ' '//div[@class="definition"] |' '//div[@class="example"]') if not definition: return 'No results for ' + inp + ' :(' def format_output(show_examples): result = '{}: '.format(h.xpath('//dt[@class="title-word"]/a/text()')[0]) correction = h.xpath('//span[@class="correct-word"]/text()') if correction: result = 'Definition for "{}": '.format(correction[0]) sections = [] for section in definition: if section.attrib['class'] == 'article': sections += [[section.text_content() + ': ']] elif section.attrib['class'] == 'example': if show_examples: sections[-1][-1] += ' ' + section.text_content() else: sections[-1] += [section.text_content()] for article in sections: result += article[0] if len(article) > 2: result += u' '.join(u'{}. {}'.format(n + 1, section) for n, section in enumerate(article[1:])) else: result += article[1] + ' ' synonyms = h.xpath('//dd[@class="synonyms"]') if synonyms: result += synonyms[0].text_content() result = re.sub(r'\s+', ' ', result) result = re.sub('\xb0', '', result) return result result = format_output(True) if len(result) > 450: result = format_output(False) if len(result) > 450: result = result[:result.rfind(' ', 0, 450)] result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...' return result
def reddit_url(match): thread = http.get_html(match.group(0)) title = thread.xpath('//title/text()')[0] author = thread.xpath("//div[@id='siteTable']//a[contains(@class,'author')]/text()")[0] timeago = thread.xpath("//div[@id='siteTable']//p[@class='tagline']/time/text()")[0] comments = thread.xpath("//div[@id='siteTable']//a[@class='comments']/text()")[0] return u'\x02{}\x02 - posted by \x02{}\x02 {} ago - {}'.format( title, author, timeago, comments)
def spotify_parse_uri(inp, say=None): url = "http://open.spotify.com/track/%s" % inp.group(1) response = http.get_html(url) title_parse = response.xpath("//h1[@itemprop='name']") artist_parse = response.xpath("//h2/a") title = title_parse[0].text_content() artist = artist_parse[0].text_content() say("Spotify: %s - %s" % (artist, title))
def got(inp): try: doc = http.get_html(url_tumblr) except HTTPError: return 'error fetching results' url_picture = doc.find_class('photo')[0] url_raw = url_picture.xpath("/html/body/div[@id='wrapper']/div[@id='main']/div[@id='container']/div[@id='content']/div/div[@class='post-content']/a[1]/img[@class='photo']/@src") url_regex = re.search("[^\'\[].+[^\'\]]", str(url_raw)) return url_regex.group(0)
def spotify_parse(inp, say=None): url = inp.group(0) response = http.get_html(url) title_parse = response.xpath("//h1[@itemprop='name']") artist_parse = response.xpath("//h2/a") title = title_parse[0].text_content() artist = artist_parse[0].text_content() say("Spotify: %s - %s" % (artist, title))
def manga(inp): "manga <query> - batoto Search" search_url = 'http://www.batoto.net/search?name=%s&name_cond=c&dosubmit=Search' % ( inp.replace(' ', '+')) results = http.get_html(search_url) try: result = results.xpath("//tbody//a/@href")[0] return u'%s' % (result) except IndexError: return u'No matches found.'
def get_beer(inp): """ search beeradvocate.com """ search_url = "http://beeradvocate.com/search?q=%s" base_url = "http://beeradvocate.com" results = http.get_html(search_url % http.quote_plus(inp)) try: result = results.xpath("//td[@id='mainContent']/div[2]/ul/li[1]")[0] except IndexError: return "No Results" page_url = base_url + result.xpath('a')[0].get('href') scores = http.get_html(page_url).cssselect('.BAscore_big') beer_info = [x.text_content() for x in result.xpath('a')] return "%s by %s :: Community Score: %s :: Bros Score: %s :: %s" % ( beer_info[0], beer_info[1], scores[0].text_content(), scores[1].text_content(), page_url)
def koran(inp): # Koran look-up plugin by Ghetto Wizard ".koran <chapter.verse> -- gets <chapter.verse> from the Koran" url = 'http://quod.lib.umich.edu/cgi/k/koran/koran-idx?type=simple' results = http.get_html(url, q1=inp).xpath('//li') if not results: return 'No results for ' + inp return results[0].text_content()
def wechall(inp): ".wechall <username> -- poll user statistics." main_url = 'https://www.wechall.net/wechall.php?' cmds = inp.split(' ', 2)[1:] query_url = '' if (len(cmds) == 2): query_url = main_url + 'username='******' ' + cmds[1] elif (len(cmds) == 1): query_url = main_url + 'username=' + cmds h = http.get_html(query_url) return h
def reddit_url(match): # match.group(0) thread = http.get_html(match) title = thread.xpath('//title/text()')[0] upvotes = thread.xpath("//span[@class='upvotes']/span[@class='number']/text()")[0] downvotes = thread.xpath("//span[@class='downvotes']/span[@class='number']/text()")[0] author = thread.xpath("//div[@id='siteTable']//a[contains(@class,'author')]/text()")[0] timeago = thread.xpath("//div[@id='siteTable']//p[@class='tagline']/time/text()")[0] comments = thread.xpath("//div[@id='siteTable']//a[@class='comments']/text()")[0] return '\x02%s\x02 - posted by \x02%s\x02 %s ago - %s upvotes, %s downvotes - %s' % ( title, author, timeago, upvotes, downvotes, comments)
def calc(inp): """.calc <term> -- returns Google Calculator result""" h = http.get_html("http://www.google.com/search", q=inp) m = h.xpath('//h2[@class="r"]/text()') if not m: return "could not calculate " + inp res = " ".join(m[0].split()) return res
def get_wiki_article(inp): # using scraping instead of the wikidot api because it sucks # it doesn't have a text search, though the site might just be using the api method to select tags anyway results = http.get_html(search_url % http.quote_plus(inp)) try: result = results.xpath(result_xpath)[0] page_url = result.values()[0] except IndexError: return "No Results" title = result.text_content() return "%s -- %s" % (title, page_url)
def wolframalpha(inp): ".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \ " results for <query>" #return "derp" url = "http://www.wolframalpha.com/input/?asynchronous=false" h = http.get_html(url, i=inp) pods = h.xpath("//div[@class='pod ']") pod_texts = [] for pod in pods: heading = pod.find('h2') if heading is not None: heading = heading.text_content().strip() if heading.startswith('Input'): continue else: continue results = [] for alt in pod.xpath('div/div[@class="output pnt"]/img/@alt'): alt = alt.strip().replace('\\n', '; ') alt = re.sub(r'\s+', ' ', alt) if alt: results.append(alt) if results: pod_texts.append(heading + ' ' + '|'.join(results)) ret = '. '.join(pod_texts) if not pod_texts: return 'no results' ret = re.sub(r'\\(.)', r'\1', ret) def unicode_sub(match): return unichr(int(match.group(1), 16)) ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret) leng = 175 if len(ret) > leng: ret = ret[:ret.rfind(' ', 0, leng)] ret = re.sub(r'\W+$', '', ret) + '...' if not ret: return 'no result' return ret
def grouphug(inp, nick='', chan='', say=None): if inp == '': #h = http.get_html("http://grouphug.us/%s" % str(uuid.uuid1())) return "http://grouphug.us/confessions/%s" % str(random_digits(10)) else: h = http.get_html("http://grouphug.us/confessions/%s" % inp) hugID = h.xpath('//h2[@class="title"]/a/text()')[1] hugContent = removeNonAscii(h.xpath('//div[@class="content"]/p/text()')[1]) if len(hugContent) > 350: hugContent = hugContent[:350] + "..." hugURL = "http://grouphug.us/confessions/%s" % (hugID) hugLink = api.shorten(hugURL) hugOutput = "%s :: \x0307%s\x0F :: \x0308%s\x0F" % (hugContent, hugID, hugLink) say(hugOutput)
def search(query): term = urllib.quote(query) #return term try: html = http.get_html('http://rule34.paheal.net/post/list/%s/1' % (term)) except: return("The site timed out. It's pretty shitty. Try again in a few seconds.") image_link = html.xpath("//div[@id='Imagesmain']/div[@class='thumbblock']/div[@class='rr thumb']/div[@class='rrcontent']/a[2]/@href") image_link = choice(image_link).split('http://')[1] image_safe = urllib.quote(image_link) image = "http://" + image_safe imgur = imgur_get(image) return imgur
def timefunction(inp, nick="", reply=None, db=None, notice=None): "time [location] [dontsave] | [@ nick] -- Gets time for <location>." save = True if '@' in inp: nick = inp.split('@')[1].strip() location = database.get(db, 'users', 'location', 'nick', nick) if not location: return "No location stored for {}.".format( nick.encode('ascii', 'ignore')) else: location = database.get(db, 'users', 'location', 'nick', nick) if not inp: if not location: notice(time.__doc__) return else: # if not location: save = True if " dontsave" in inp: save = False location = inp.split()[0] # now, to get the actual time try: url = "https://www.google.com/search?q=time+in+%s" % location.replace( ' ', '+').replace(' save', '') html = http.get_html(url) prefix = html.xpath( "//div[contains(@class,'vk_c vk_gy')]//span[@class='vk_gy vk_sh']/text()" )[0].strip() curtime = html.xpath( "//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_bk vk_ans']/text()" )[0].strip() day = html.xpath( "//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/text()" )[0].strip() date = html.xpath( "//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/span/text()" )[0].strip() except IndexError: return "Could not get time for that location." if location and save: database.set(db, 'users', 'location', location, 'nick', nick) return formatting.output( 'Time', [u'{} is \x02{}\x02 [{} {}]'.format(prefix, curtime, day, date)])