def fml(inp): ".fml [id] -- Gets a random quote from fmyfife.com. Optionally gets [id]." inp = inp.replace("#", "") if inp: if not inp.isdigit(): return "Invalid ID!" try: page = http.get(urljoin(base_url, inp)) except (HTTPError, IOError): return "Could not fetch #%s. FML" % inp else: try: page = http.get(urljoin(base_url, 'random')) except (HTTPError, IOError): return "I tried to use .fml, but it was broken. FML" soup = BeautifulSoup(page) soup.find('div', id='submit').extract() post = soup.body.find('div', 'post') try: id = int(post.find('a', 'fmllink')['href'].split('/')[-1]) except TypeError: return "Could not fetch #%s. FML" % inp body = misc.strip_html(' '.join(link.renderContents() for link in post('a', 'fmllink'))) return '(#%d) %s' % (id, body)
def login(user, password): """ Authenticate against SomethingAwful, both storing that authentication in the global cookiejar and returning the relevant cookies :param user: your awful username for somethingawful dot com :param password: your awful password for somethingawful dot com :return: the authentication cookies for somethingawful dot com """ get_sa_cookies = lambda jar: [ c for c in jar if c.domain.endswith("forums.somethingawful.com") and (c.name == "bbuserid" or c.name == "bbpassword") ] http.clear_expired_cookies() sa_cookies = get_sa_cookies(http.get_cookie_jar()) if len(sa_cookies) == 2: return sa_cookies post_data = {"action": "login", "username": user, "password": password} http.get(LOGIN_URL, cookies=True, post_data=post_data) sa_cookies = get_sa_cookies(http.get_cookie_jar()) if len(sa_cookies) < 2: return None return sa_cookies
def login(user, password): http.jar.clear_expired_cookies() if any(cookie.domain == 'forums.somethingawful.com' and cookie.name == 'bbuserid' for cookie in http.jar): if any(cookie.domain == 'forums.somethingawful.com' and cookie.name == 'bbpassword' for cookie in http.jar): return assert("malformed cookie jar") http.get("http://forums.somethingawful.com/account.php", cookies=True, post_data="action=login&username=%s&password=%s" % (user, password))
def down(inp): """down <url> -- Checks if the site at <url> is up or down.""" if 'http://' not in inp: inp = 'http://' + inp inp = 'http://' + urlparse.urlparse(inp).netloc # http://mail.python.org/pipermail/python-list/2006-December/589854.html try: http.get(inp, get_method='HEAD') return '{} seems to be up'.format(inp) except http.URLError: return '{} seems to be down'.format(inp)
def down(inp): '''.down <url> -- checks to see if the site is down''' if 'http://' not in inp: inp = 'http://' + inp inp = 'http://' + urlparse.urlparse(inp).netloc # http://mail.python.org/pipermail/python-list/2006-December/589854.html try: http.get(inp, get_method='HEAD') return inp + ' seems to be up' except http.URLError: return inp + ' seems to be down'
def down(inp): "down <url> -- Checks if the site at <url> is up or down." if 'http://' not in inp: inp = 'http://' + inp inp = 'http://' + urlparse.urlparse(inp).netloc # http://mail.python.org/pipermail/python-list/2006-December/589854.html try: http.get(inp, get_method='HEAD') return inp + ' seems to be up' except http.URLError: return inp + ' seems to be down'
def down(inp): """down <url> -- Checks if the site at <url> is up or down. :type inp: str """ if not inp.startswith("http://"): inp = 'http://' + inp inp = 'http://' + urllib.parse.urlparse(inp).netloc try: http.get(inp, get_method='HEAD') return '{} seems to be up'.format(inp) except http.URLError: return '{} seems to be down'.format(inp)
def parse_standings(db=None): url = "http://www.nascar.com/en_us/sprint-cup-series/drivers.html" try: page = http.get(url) except HTTPError: print "Can't get standings." return "" soup = BeautifulSoup(page) drivers = soup.find_all('article', class_='driverCard') for driver in drivers: data = { 'first_name': '', 'last_name': '', 'driver_no': -1, 'rank': -1, 'points': -1 } if 'data-first-name' in driver.attrs: data['first_name'] = driver.attrs['data-first-name'] if 'data-last-name' in driver.attrs: data['last_name'] = driver.attrs['data-last-name'] if 'data-rank' in driver.attrs: data['rank'] = int(driver.attrs['data-rank'].replace('--', '-1')) if 'data-number' in driver.attrs: data['driver_no'] = driver.attrs['data-number'] if data['driver_no'] == '': data['driver_no'] = -1 else: data['driver_no'] = int(data['driver_no']) data['points'] = int(driver.find('dl', class_='points').find('dd').find(text=True).replace('--', '-1')) upsert_standings(db, data)
def parse_json(json_url, db=None): # check to see if it's a 404 page try: page = http.get(json_url) except HTTPError: print "Can't get live stats." return "" page_matches = re.search(r'404 Not Found', page) if page_matches is not None and page_matches.group(0): return False js = json.loads(page) raceinfo = js race_id = raceinfo['RaceID'] print "HERE IS THE RACE_ID => {}".format(race_id) nascar['current_race_id'] = race_id raceinfo = clean_raceinfo(raceinfo) upsert_raceinfo(db, raceinfo) previous_leader = get_current_leader(race_id, db) passings = json.loads(page) passings = passings['Passings'] for driver in passings: driver = clean_racestats(driver) upsert_racestats(race_id, db, driver) current_leader = get_current_leader(race_id, db) if current_leader != previous_leader: messages.append("\x02[NEW LEADER]\x02 #{} {}".format(str(current_leader[0]), current_leader[1])) print "parsed json"
def convert(inp, say=None): m = reg.findall(inp) v1, c1, c2 = m[0] j = http.get('http://www.google.com/ig/calculator?hl=en&q={0}{1}=?{2}'.format(v1, c1, c2)) g = greg.findall(j.decode('utf-8', errors='ignore')) if j: return '{0} = {1}'.format(*g)
def get_zipped_xml(*args, **kwargs): try: path = kwargs.pop("path") except KeyError: raise KeyError("must specify a path for the zipped file to be read") zip_buffer = StringIO(http.get(*args, **kwargs)) return etree.parse(ZipFile(zip_buffer, "r").open(path))
def show_title(match, nick='', chan='', say=None): matched = match.group().encode('utf-8') url = urlnorm.normalize(matched) host = Request(url).get_host() if not nick in ignore: page, response = http.get_html_and_response(url) message = '' if host not in ignore_hosts: parser = BeautifulSoup(response) title = parser.title.string.strip() if title: message = 'URL title: %s' % (title) # Shorten URLs that are over 80 characters. if len(url) >= 80: short_url = http.get( 'http://is.gd/create.php', query_params = {'format': 'simple', 'url': matched} ) # Cheap error checking if 'error: please' not in short_url.lower(): if message: message += ' | Short URL: %s' else: message = 'Short URL: %s' message = message % (short_url) if message: say(message)
def down(inp): '''.down <url> -- checks to see if the website is down''' urlp = urlparse.urlparse(inp, 'http') if urlp.scheme not in ('http', 'https'): return inp + " is not a valid HTTP URL" inp = "%s://%s" % (urlp.scheme, urlp.netloc) # http://mail.python.org/pipermail/python-list/2006-December/589854.html try: http.get(inp, get_method='HEAD') return inp + ' seems to be up' except http.URLError: return inp + ' seems to be down'
def randombukkitplugin(inp, reply=None): if not path.exists("plugins/data/bukgetplugins"): with open("plugins/data/bukgetplugins", "w") as f: f.write(http.get("http://api.bukget.org/3/plugins/bukkit")) jsahn = json.loads(open("plugins/data/bukgetplugins", "r").read()) pickslug = random.choice(jsahn)['slug'] data = getplugininfo(pickslug) name = data['plugin_name'] description = data['description'] url = data['website'] authors = data['authors'][0] authors = authors[0] + u"\u200b" + authors[1:] stage = data['stage'] lastUpdate = time.strftime('%d %B %Y %H:%M', time.gmtime(data['versions'][0]['date'])) lastVersion = data['versions'][0]['version'] bukkitver = ", ".join(data['versions'][0]['game_versions']) link = web.isgd(data['versions'][0]['link']) if description != "": reply("\x02{}\x02, by \x02{}\x02 - {} - ({}) \x02{}".format( name, authors, description, stage, url)) else: reply("\x02{}\x02, by \x02{}\x02 ({}) \x02{}".format( name, authors, stage, url)) reply("Last release: \x02v{}\x02 for \x02{}\x02 at {} \x02{}\x02".format( lastVersion, bukkitver, lastUpdate, link))
def _search(inp, say): url = "http://duckduckgo.com/lite?"+urllib.urlencode({"q":inp.encode('utf8', 'ignore')}) try: data = http.get(url) except http.HTTPError, e: say(str(e)+": "+url) return
def clock(inp, say=None): '''.time <area> -- gets the time in <area>''' white_re = re.compile(r'\s+') tags_re = re.compile(r'<[^<]*?>') page = http.get('http://www.google.com/search', q="time in " + inp) soup = BeautifulSoup(page) response = soup.find('td', {'style' : 'font-size:medium'}) if response is None: return "Could not get the time for " + inp + "!" output = response.renderContents() output = ' '.join(output.splitlines()) output = output.replace("\xa0", ",") output = white_re.sub(' ', output.strip()) output = tags_re.sub('\x02', output.strip()) output = output.decode('utf-8', 'ignore') return output
def get_profile(name): profile = {} # form the profile request request = {"name": name, "agent": "minecraft"} # submit the profile request try: headers = {"Content-Type": "application/json"} r = http.get_json('https://api.mojang.com/profiles/page/1', post_data=json.dumps(request).encode('utf-8'), headers=headers) except (http.URLError, http.HTTPError) as e: raise McuError("Could not get profile status: {}".format(e)) user = r["profiles"][0] profile["name"] = user["name"] profile["id"] = user["id"] profile["legacy"] = user.get("legacy", False) try: response = http.get(PAID_URL, user=name) except (http.URLError, http.HTTPError) as e: raise McuError("Could not get payment status: {}".format(e)) if "true" in response: profile["paid"] = True else: profile["paid"] = False return profile
def suggest(inp, inp_unstripped=''): ".suggest [#n] <phrase> -- gets a random/the nth suggested google search" inp = inp_unstripped m = re.match('^#(\d+) (.+)$', inp) if m: num, inp = m.groups() num = int(num) if num > 10: return 'can only get first ten suggestions' else: num = 0 page = http.get('http://google.com/complete/search', output='json', client='hp', q=inp) page_json = page.split('(', 1)[1][:-1] suggestions = json.loads(page_json)[1] if not suggestions: return 'no suggestions found' if num: if len(suggestions) + 1 <= num: return 'only got %d suggestions' % len(suggestions) out = suggestions[num - 1] else: out = random.choice(suggestions) return '#%d: %s' % (int(out[2][0]) + 1, out[0].replace('<b>', '').replace('</b>', ''))
def zeroclick(inp, say=None, input=None): "zeroclick/0click <search> -- gets zero-click info from DuckDuckGo" if inp.group(2) != "": if inp.group(2).lower() == "what is love": return "http://youtu.be/xhrBDcQq2DM" url = "http://duckduckgo.com/lite?" params = {"q":inp.group(2).replace("\001","").encode('utf8', 'ignore')} url = "http://duckduckgo.com/lite/?"+urllib.urlencode(params) try: data = http.get(url).decode("utf-8","ignore") except http.HTTPError, e: say(str(e)+": "+url) return #search = re.findall("""\t<td>.\t\s+(.*?).\t<\/td>""",data,re.M|re.DOTALL) m = re.findall("\t<td>.\t\s+(.*?).\t<\/td>",data,re.M|re.DOTALL) if len(m) == 1: search = re.sub("\s+"," ", re.sub('<[^<]+?>',' ',m[0])) else: search = None if search: out = HTMLParser.HTMLParser().unescape(search.replace("<br>"," ").replace("<code>","\002").replace("</code>","\002")) if out: say(u"\x0302\x02ǁ\x02\x03 {}".format(out.split(" [ More at")[0].split("}")[-1].strip())) else: say(u"\x0302\x02ǁ\x02\x03 No results") else: say(u"\x0302\x02ǁ\x02\x03 No results found.")
def factoid(inp, message=None, db=None, bot=None, action=None, conn=None, input=None): """?<word> -- Shows what data is associated with <word>.""" try: prefix_on = bot.config["plugins"]["factoids"].get("prefix", False) except KeyError: prefix_on = False db_init(db, conn) # split up the input split = inp.group(1).strip().split(" ") factoid_id = split[0] if len(split) >= 1: arguments = u" ".join(split[1:]) else: arguments = "" data = get_memory(db, factoid_id) if data: # factoid preprocessors if data.startswith("<py>"): code = data[4:].strip() variables = u'input="""{}"""; nick="{}"; chan="{}"; bot_nick="{}"; '.format(arguments.replace('"', '\\"'), input.nick, input.chan, input.conn.nick) if code.startswith("<force>"): code = code[8:].strip() result = unicode(pyexec.eval_py(variables + code, paste_multiline=False)) else: result = unicode(pyexec.eval_py(variables + code)) else: result = unicode(data) # factoid postprocessors result = text.multiword_replace(result, shortcodes) if result.startswith("<act>"): result = result[5:].strip() action(result) elif result.startswith("<url>"): url = result[5:].strip() try: message(http.get(url)) except http.HttpError: message("Could not fetch URL.") else: if "\n" in result: result = result.strip("\r").split("\n") for output in result: if prefix_on: message(u"\x02[{}]:\x02 {}".format(factoid_id, output)) else: message(output) else: if prefix_on: message(u"\x02[{}]:\x02 {}".format(factoid_id, result)) else: message(result)
def mcstatus(inp): """mcstatus -- Checks the status of various Mojang (the creators of Minecraft) servers.""" try: request = http.get("http://status.mojang.com/check") except (http.URLError, http.HTTPError) as e: return "Unable to get Minecraft server status: {}".format(e) # lets just reformat this data to get in a nice format data = json.loads( request.replace("}", "").replace("{", "").replace("]", "}").replace("[", "{")) out = [] # use a loop so we don't have to update it if they add more servers yes = [] no = [] for server, status in data.items(): if status == "green": yes.append(server) else: no.append(server) if yes: out = "\x033\x02Online\x02\x0f: " + ", ".join(yes) if no: out += " " if no: out += "\x034\x02Offline\x02\x0f: " + ", ".join(no) return "\x0f" + out.replace(".mojang.com", ".mj") \ .replace(".minecraft.net", ".mc")
def scrape_pastebin(url): id = re.search(r'http://(?:www\.)?pastebin.com/([a-zA-Z0-9]+)$', url).group(1) rawurl = "http://pastebin.com/raw.php?i=" + id text = http.get(rawurl) return text
def oblique(inp, nick='', chan=''): '.o/.oblique <command> <args> -- runs <command> using oblique web' ' services. see http://wiki.github.com/nslater/oblique/' update_commands() if ' ' in inp: command, args = inp.split(None, 1) else: command = inp args = '' command = command.lower() if command == 'refresh': update_commands(True) return '%d commands loaded.' % len(commands) if command in commands: url = commands[command] url = url.replace('${nick}', nick) url = url.replace('${sender}', chan) url = url.replace('${args}', http.quote(args.encode('utf8'))) try: return http.get(url) except http.HTTPError, e: return "http error %d" % e.code
def mcpaid(inp): ".mcpaid <username> -- Checks if <username> has a premium Minecraft account." login = http.get("http://www.minecraft.net/haspaid.jsp?user="******"true" in login: return "The account '" + inp + "' is a premium Minecraft account!" else: return "The account '" + inp + "' is not a premium Minecraft account!"
def suggest(inp, inp_unstripped=''): ".suggest [#n] <phrase> -- gets a random/the nth suggested google search" inp = inp_unstripped m = re.match('^#(\d+) (.+)$', inp) if m: num, inp = m.groups() num = int(num) if num > 10: return 'can only get first ten suggestions' else: num = 0 #page = http.get('http://suggestqueries.google.com/complete/search', output='json', client='hp', q=inp) page = http.get('http://suggestqueries.google.com/complete/search', output='toolbar', hl='en', q=inp) xml = minidom.parseString(page) suggestions = xml.getElementsByTagName("CompleteSuggestion") #page_json = page.split('(', 1)[1][:-1] #suggestions = json.loads(page_json)[1] if not suggestions: return 'no suggestions found' if num: if len(suggestions) + 1 <= num: return 'only got %d suggestions' % len(suggestions) out = suggestions[num-1].childNodes[0].getAttribute('data') out = suggestions[num - 1] else: choice = random.randint(1, len(suggestions))-1 out = suggestions[choice].childNodes[0].getAttribute('data') return '#%d: %s' % (choice+1, out)
def run(self): db = sqlite3.connect(self.dbpath) db.execute( "create table if not exists scps(number varchar primary key, title varchar)" ) db.text_factory = str while True: try: c = db.cursor() c.execute("delete from scps") page = http.to_utf8( http.get("http://scp-wiki.wikidot.com/scp-series")) scp_re = re.compile( r'<a href="/scp-(.*)">SCP-\1</a> - (.*?)</li>', re.I) scp_list = scp_re.findall(page) for (k, v) in scp_list: print k, v c.execute( u"replace into scps(number, title) values (upper(?), ?)", (k, v)) db.commit() c.close() except Exception as e: print "ERROR ERROR ERROR, ", e sleep(60 * 5)
def hashtag(inp, say=None, db=None, bot=None, me=None, conn=None, input=None, chan=None, notice=None): "<word>? -- Shows what data is associated with <word>." disabledhashes = database.get(db,'channels','disabledhashes','chan',chan) split = inp.group(1).strip().split(" ") try: if chan[0] != '#': pass elif split[0].lower() in disabledhashes.lower(): notice('{} is disabled.'.format(split[0])) return except TypeError: pass try: prefix_on = bot.config["plugins"]["factoids"].get("prefix", False) except KeyError: prefix_on = False db_init(db) # split up the input split = inp.group(1).strip().split(" ") factoid_id = split[0] if len(split) >= 1: arguments = " ".join(split[1:]) else: arguments = "" data = get_memory(db, factoid_id) if data: # factoid preprocessors if data.startswith("<py>"): code = data[4:].strip() variables = 'input="""%s"""; nick="%s"; chan="%s"; bot_nick="%s";' % (arguments.replace('"', '\\"'), input.nick, input.chan, input.conn.nick) result = execute.eval_py(variables + code) elif data.startswith("<url>"): url = data[5:].strip() try: result = http.get(url) except http.HttpError: result = "Could not fetch URL." else: result = data # factoid postprocessors result = text.multiword_replace(result, shortcodes) if result.startswith("<act>"): result = result[5:].strip() me(result) else: if prefix_on: say("\x02[%s]:\x02 %s" % (factoid_id, result)) else: say("\x02%s\x02 %s" % (factoid_id, result))
def kernel(reply): contents = http.get("https://www.kernel.org/finger_banner") contents = re.sub(r'The latest(\s*)', '', contents) contents = re.sub(r'version of the Linux kernel is:(\s*)', '- ', contents) lines = contents.split("\n") message = "Linux kernel versions: {}".format(", ".join(line for line in lines[:-1])) reply(message)
def tfw(inp): '.tfw [zip|postcode] -- THE F*****G WEATHER' src = http.get(tfw_url % inp) location = re.search(r_loc, src).group(1) temp = re.search(r_tmp, src).group(1) desc = re.search(r_dsc, src).group(1).replace("<br />", ". ") c = int((int(temp) - 32) * (5.0/9.0)) return "%s. %sF/%sC. %s" % (location, temp, c, desc)
def cdecl(inp): '''.cdecl <expr> -- translate between C declarations and English, using cdecl.org''' query_url = get_cdecl_query_url() if not query_url: return "cannot find CDECL query url" return http.get(query_url, q=inp)
def scrape_mibpaste(url): pagesource = http.get(url) rawpaste = re.search(r'(?s)(?<=<body>\n).+(?=<hr>)', pagesource).group(0) filterbr = rawpaste.replace("<br />", "") unescaped = decode_html(filterbr) stripped = unescaped.strip() return stripped
def rating(inp): if "scp-" not in inp: inp = "scp-" + inp print("Calling http.get() on http://www.scp-wiki.net/%s" % inp) page = http.get("http://www.scp-wiki.net/%s" % inp) rating = http.get_html( "http://www.scp-wiki.net/%s" % inp).xpath("//*[@class='number prw54353']/text()")[0] return rating
def wordpress(inp, say=None): "wordpress/wp <name> -- checks if the WordPress exists" domain = inp.split(" ")[0] url = "http://en.wordpress.com/typo/?"+urllib.urlencode({"subdomain":domain}) try: data = http.get(url) except http.HTTPError, e: return (str(e))
def randomquote(inp, input=None, bot=None): help = ",randomquote <person> -- gets random quotes. Currently allowed persons Cave Johnson, GLaDOS, Wheatley, Gir, Zim, Yoda, Sheldon Cooper" if inp=="": return(help) check = [] check = inp.lower().split(" ") if check[0]=="cave": h = http.get('http://www.imdb.com/character/ch0242805/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("Cave Johnson: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(cave))),wordDic)) if check[0]=="glados": h = http.get('http://www.imdb.com/character/ch0069595/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("GLaDOS: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(glados))),wordDic)) if check[0]=="wheatley": h = http.get('http://www.imdb.com/character/ch0242806/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("Wheatley: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(wheatly))),wordDic)) if check[0]=="gir": h = http.get('http://www.imdb.com/character/ch0131917/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("Gir: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(gir))).encode("utf8"),wordDic)) if check[0]=="zim": h = http.get('http://www.imdb.com/character/ch0088128/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("Zim: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(zim))).encode("utf8"),wordDic)) if check[0]=="yoda": h = http.get('http://www.imdb.com/character/ch0000015/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("Yoda: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(yoda))).encode("utf8"),wordDic)) if check[0]=="sheldon": h = http.get('http://www.imdb.com/character/ch0064640/quotes').replace("\n","").replace(" ","").replace(" ","").replace(" ","") return("Sheldon Cooper: "+multiwordReplace(random.choice(regexmatch2(h,re.compile(sheldon))).encode("utf8"),wordDic))
def tld(inp): "tld <tdl> -- returns info about the tld" if inp.startswith("."): _tld = inp[1:] else: _tld = inp if "." in _tld: _tld = _tld.split(".")[-1] try: data = http.get("http://www.iana.org/domains/root/db/%s.html"%_tld.encode("utf8","ignore")) except http.HTTPError, e: if "404:" in str(e): try: data = http.get("https://en.wikipedia.org/wiki/.%s"%_tld.encode("utf8","ignore")) except http.HTTPError, e: return "No match for %s"%_tld search = re.search("""<th scope="row" style="text-align:left;">Sponsor<\/th>\n<td><a href="(.*)" title="(.*)">(.*)<\/a><\/td>""",data) if search: return "TLD: %s - Sponsor: %s"%(_tld,search.group(3)) else: return "No match for %s"%_tld
def hos(inp, reply=None): """hos <user> - This command will lookup our Hall of Shame""" if inp == "": return '\x038You need to pass a username! (example: \"!hos frdmn\")' else: entries = http.get("http://yeahwh.at/test/api/hos.php?user="******"<br>") for entry in entries: reply(entry)
def pomf(inp, say=None): "pomf <search> -- search pomf.se" if inp: url = "http://moe.pomf.se/includes/api.php?"+urllib.urlencode({"do":"search","q":inp}) try: data = http.get(url) except http.HTTPError, e: say(str(e)+": "+url) return
def get_steam_info(url): page = http.get(url) soup = BeautifulSoup(page, 'lxml', from_encoding="utf-8") data = { "name": soup.find('div', { 'class': 'apphub_AppName' }).text, "desc": truncate_str( soup.find('meta', {'name': 'description'})['content'].strip(), 80) } # get the element details_block details = soup.find('div', {'class': 'details_block'}) # loop over every <b></b> tag in details_block for b in details.findAll('b'): # get the contents of the <b></b> tag, which is our title title = b.text.lower().replace(":", "") if title == "languages": # we have all we need! break # find the next element directly after the <b></b> tag next_element = b.nextSibling if next_element: # if the element is some text if isinstance(next_element, NavigableString): text = next_element.string.strip() if text: # we found valid text, save it and continue the loop data[title] = text continue else: # the text is blank - sometimes this means there are # useless spaces or tabs between the <b> and <a> tags. # so we find the next <a> tag and carry on to the next # bit of code below next_element = next_element.find_next('a', href=True) # if the element is an <a></a> tag if isinstance(next_element, Tag) and next_element.name == 'a': text = next_element.string.strip() if text: # we found valid text (in the <a></a> tag), # save it and continue the loop data[title] = text continue data["price"] = soup.find('div', { 'class': 'game_purchase_price price' }).text.strip() return "\x02{name}\x02: {desc}, \x02Genre\x02: {genre}, \x02Release Date\x02: {release date}," \ " \x02Price\x02: {price}".format(**data)
def parse(match): url = urlnorm.normalize(match.encode('utf-8')) if url not in ignored_urls: url = url.decode('utf-8') try: soup = BeautifulSoup.BeautifulSoup(http.get(url)) return soup.title.string except: return "fail"
def kernel(reply): contents = http.get("https://www.kernel.org/finger_banner") contents = re.sub(r'The latest(\s*)', '', contents) contents = re.sub(r'version of the Linux kernel is:(\s*)', '- ', contents) lines = contents.split("\n") message = "Linux kernel versions: {}".format(", ".join( line for line in lines[:-1])) reply(message)
def haspaid(inp, nick=None): if inp == '': inp = nick thing = " ".join([x for x in http.get("http://www.minecraft.net/haspaid.jsp", user=inp).splitlines() if x]) if thing == 'true': return "%s has bought minecraft." % (inp) else: return "%s has not bought minecraft." % (inp)
def kernel(inp, reply=None): """.kernel - Displays recent kernel versions.""" contents = http.get("https://www.kernel.org/finger_banner") contents = re.sub(r'The latest(\s*)', '', contents) contents = re.sub(r'version of the Linux kernel is:(\s*)', '- ', contents) lines = contents.split("\n") message = "Linux kernel versions: " message += ", ".join(line for line in lines[:-1]) reply(message)
def get_cdecl_query_url(): print("RUNNING") result = http.get(CDECL_URL) match = RE_QUERY_ENDPOINT.search(result) if not match: return None return match.group(1)
def scrape_mibpaste(url): if not url.startswith("http"): url = "http://" + url pagesource = http.get(url) rawpaste = re.search(r'(?s)(?<=<body>\n).+(?=<hr>)', pagesource).group(0) filterbr = rawpaste.replace("<br />", "") unescaped = decode_html(filterbr) stripped = unescaped.strip() return stripped
def refresh_cache(): """Gets a page of random FMLs and puts them into a dictionary""" page = http.get(urljoin(base_url, 'random')) soup = BeautifulSoup(page) for e in soup.findAll('div', {'class': 'post article'}): id = int(e['id']) text = ''.join(e.find('p').findAll(text=True)) text = http.unescape(text) fml_cache[id] = text
def down(inp): '''.down <url> -- checks to see if the website is down''' # urlparse follows RFC closely, so we have to check for schema existance and prepend empty schema if necessary if not inp.startswith('//') and '://' not in inp: inp = '//' + inp urlp = urllib.parse.urlparse(str(inp), 'http') if urlp.scheme not in ('http', 'https'): return inp + " is not a valid HTTP URL" inp = "%s://%s" % (urlp.scheme, urlp.netloc) # http://mail.python.org/pipermail/python-list/2006-December/589854.html try: http.get(inp, get_method='HEAD') return inp + ' seems to be up' except http.URLError as error: return inp + ' seems to be down. Error: %s' % error.reason
def isup(inp): "isup <url> -- checks if the website is up or not" data = http.get("http://isup.me/%s" % inp) if "If you can see this page and still think we're down, it's just you." in data: return ( "If you can see this text and still think we're down, it's just you." ) elif "It's just you." in data: return ("It's just you.") else: return ("It's not just you")
def down(inp): """.down <url> -- checks to see if the website is down""" # urlparse follows RFC closely, so we have to check for schema existance and prepend empty schema if necessary if not inp.startswith("//") and "://" not in inp: inp = "//" + inp urlp = urllib.parse.urlparse(str(inp), "http") if urlp.scheme not in ("http", "https"): return inp + " is not a valid HTTP URL" inp = "%s://%s" % (urlp.scheme, urlp.netloc) # http://mail.python.org/pipermail/python-list/2006-December/589854.html try: http.get(inp, get_method="HEAD") return inp + " seems to be up" except http.URLError as error: return inp + " seems to be down. Error: %s" % error.reason
def ghissues(text): """ghissues username/repo [number] - Get specified issue summary, or open issue count """ args = text.split() if args[0] in shortcuts: repo = shortcuts[args[0]] else: repo = args[0] url = "https://api.github.com/repos/{}/issues".format(repo) specific_issue = len(args) > 1 if specific_issue: url += "/{}".format(args[1]) print("Fetching {}".format(url)) try: raw_data = http.get(url) except urllib.error.HTTPError: if specific_issue: return "Error getting issues for '{}/{}', is it a valid issue?".format( args[0], args[1]) else: return "Error getting issues for '{}', is it a valid repository?".format( args[0]) issue_list = json.loads(raw_data) if not specific_issue: if len(issue_list) < 1: return "Repository has no open issues" issue = issue_list[0] else: issue = issue_list # only had one issue issue_number = issue["number"] if issue["state"] == "open": state = "\x033\x02OPEN\x02\x0f" else: state = "\x034\x02CLOSED\x02\x0f by {}".format( issue["closed_by"]["login"]) user = issue["user"]["login"] title = issue["title"] summary = truncate(issue["body"]) try: shorturl = try_shorten_gitio(issue["html_url"]) except urllib.error.HTTPError: shorturl = try_shorten_gitio(issue["html_url"] + " " + repo.split("/")[1] + issue_number) if summary: return format_with_summary.format(issue_number, state, user, title, summary, shorturl) else: return format_without_summary.format(issue_number, state, user, title, shorturl)