def autotitle(self, input): """Automatically shows the title for specified sites""" if hasattr(self.bot,"pluginstorage_at"): self.storage = self.bot.pluginstorage_at else: self.say("Patterns not loaded, hopefully this should never happen") matches = re.findall(r"(https?://[^ ]+|www\.[^ ]+)", input.args, re.I) if not matches: return for m in matches: url = m.encode('utf-8') if not url.startswith("http"): url = "http://" + url for p in self.storage["autotitle"]: if re.search(p, url, re.I): try: page = tounicode(urllib2.urlopen(url).read()) title = re.search('<title>(.*?)</title>', page, re.I | re.MULTILINE | re.DOTALL) if not title: self.say("Page has no title tag!") return title = decodehtml(title.group(1).replace("\n","")).strip() title = re.sub(r"\s+", " ", title) self.say("\x02Title:\x02 %s" % title) except urllib2.URLError, e: self.say('Error: Invalid url.')
def title(self, input): """Fetches the contents of the <title> tag of a web page""" url = input.args.strip() if not url: try: url = self.lasturl[input.sender.lower()] except KeyError: self.reply("No URLs posted previously and none given, nothing I can do.") return m = re.search(r"^https?://", url, re.I) if not m: url = "http://" + url self.lasturl[input.sender.lower()] = url try: page = tounicode(urllib2.urlopen(url).read()) title = re.search('<title>(.*?)</title>', page, re.I | re.MULTILINE | re.DOTALL) if not title: self.say("Page has no title tag!") return self.say("\x02Title:\x02 %s" % decodehtml(title.group(1).replace("\n",""))) except urllib2.URLError, e: self.say('Error: Invalid url.')
def checksites(self, pattern=None): for site in self.storage['sites']: if (pattern or "") in site.url: try: if pattern: reactor.callFromThread(self.msg, site.chan, "Checking %s..." % site.url) res = site.check() if res: if pattern: reactor.callFromThread(self.msg, site.chan, "Found %d new entries:" % len(res)) res.reverse() for entry in res: reactor.callFromThread(self.msg, site.chan, "[RSS] \x02%s\x02 - \x1f%s" % (decodehtml(entry.get('title', '')), entry.get('link', ''))) msg = entry.get('description', '') msg = re.sub("<br\s?/?>", "\n", msg) msg = decodehtml(removehtml(msg)) if site.limit: msg = "\n".join(msg.split("\n")[:site.limit]) reactor.callFromThread(self.msg, site.chan, msg) else: if pattern: reactor.callFromThread(self.msg, site.chan, "No new entries found.") except Exception, e: reactor.callFromThread(self.msg, site.chan, "\x02RSS:\x02 Error while checking %s. (%s)!" % (site.url, e))
def spotify(self, input): """Automatically catches Spotify URLs and retrieves track info""" trackreg = r"(http://open.spotify.com/track/[^\s]+)" track = re.search(trackreg,input.args,re.I) if track: self.say(decodehtml(spotifytrack(track.group(1)))) return albumreg = r"(http://open.spotify.com/album/[^\s]+)" album = re.search(albumreg,input.args,re.I) if album: self.say(decodehtml(spotifyalbum(album.group(1)))) return # TODO: playlists? playlist = None if playlist: pass
def google(self, input): """Perform a web search using the Google search engine""" args = input.args or "" parser = self.OptionParser() parser.add_option("-d", "-r", "--results", dest="results", default=1, type="int") (options, args) = parser.parse_args(args.split()) if not args: raise self.BadInputError() query = " ".join(args).encode('utf-8') if options.results < 1: options.results = 1 elif options.results > 10: options.results = 10 try: data = urllib.urlopen('http://www.google.com/uds/GwebSearch?callback=GwebSearch.RawCompletion&context=0&lstkp=0&hl=en&key=ABQIAAAAeBvxXUmueP_8_kTINo0H4hSKL4HoBFFxfS_vfvgFpLqAt5GPWRTHDAESci2RYvZRkcpsYXapXjZWKA&v=1.0&rsz=large&q=%s' % urllib.quote(query)).read() except IOError: self.say("Error: Unable to establish a connection to google.com") return data = unescapeuni(data) data = decodehtml(data) m = re.search('estimatedResultCount":"([^"]+)"', data) if m: matches = m.group(1) m = re.findall(r'"url":"([^"]*)".*?"titleNoFormatting":"([^"]*)","content":"([^"]*)"', data, re.IGNORECASE) if m: if len(m) < options.results: options.results = len(m) if options.results == 1: self.say('\x02%s\x02 - ( \x1f%s\x1f ) [%s matches]' % (removehtml(m[0][1]), urllib.unquote(m[0][0]), matches)) self.say(removehtml(m[0][2])) else: self.say('Showing the first \x02%s\x02 of \x02%s\x02 matches' % (options.results, matches)) for x in range(options.results): self.say('\x02%s\x02 - ( \x1f%s\x1f )' % (removehtml(m[x][1]), urllib.unquote(m[x][0]))) else: phenny.say('Your search for \x02%s\x02 did not return any results.' % input.args)
def unnecessaryknowledge(self, input): """Get som unnecessary knowledge from unnecessaryknowledge.com""" if not input.args: raise self.BadInputError() try: data = urllib.urlopen('http://www.unnecessaryknowledge.com/_default.asp').read() except IOError: self.say("Error: Unable to establish a connection to unnecessaryknowledge.com.") return data = data.replace('\r','').replace('\n','') m = re.search(r"<h2[^>]+?>(?P<text>.+?)</h2>",data,re.IGNORECASE) if not m: self.say("Error: Unable to parse data.") return msg = m.group("text") re.sub(r"(?:<a href[^>]*>|</a>)",'\x02', msg) msg = decodehtml(msg).strip() self.say(msg)
def whatis(self, input): """Performs a "what is <argument>" query to Google and displays the result""" if not input.args: raise self.BadInputError() query = input.args.strip() showurl = False if query.startswith("-u "): showurl = True query = query[3:] query = "what is " + query query = query.encode('utf-8') url = qurl % urllib.quote(query) if showurl: self.say(chr(2) + "URL: " + chr(2) + url) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor) headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.2) Gecko/2008091620 Firefox/3.0.2', 'Connection':'Keep-Alive', 'Content-Type':'application/x-www-form-urlencoded'} page = opener.open(urllib2.Request(url,None,headers)).read() regexes = [r"<h[23]\sclass=r[^>]*><b>(.+?)</b></h[23]>", r"onebox/[^>]*>(.*?)<(?:/table|br)"] for regex in regexes: match = re.search(regex, page,re.IGNORECASE) if match: self.say(decodehtml(removehtml(match.group(1).strip()))) return self.say("Dunno :S")
def sl(self, input): """Queries sl.se for train/bus times""" cmd = input.args m = re.search(r'(?P<later>sen|tidig)(?:are)?|(?P<start>[^,]+),\s*(?P<stop>[^,]+)(?:,\s*?(?:(?P<date>\d{4}-\d{2}-\d{2} [012]?[0-9][.:]?[0-5][0-9])|(?P<time>[01-2]?[0-9][.:]?[0-5][0-9])))?', cmd, re.I) if not m: raise self.BadInputError() return baseurl = """http://reseplanerare.sl.se/bin/query.exe/sn?REQ0JourneyStopsS0A=255&S=%s&REQ0JourneyStopsZ0A=255&Z=%s&start=yes&REQ0JourneyTime%3D%s&REQ0HafasSearchForw=%s""" nick = input.nick if m.group("later"): if not hasattr(self.bot, "sl_posttarget"): self.say("Sorry, didn't work!") return if "sen" in m.group("later"): earlat = {self.bot.sl_later:"Åk senare"} else: earlat = {self.bot.sl_earlier:"Åk tidigare"} earlat = urllib.urlencode(earlat) req = urllib2.Request(self.bot.sl_posttarget, earlat) data = urllib2.urlopen(req).read() data = decodehtml(data) if m.group("start"): start = m.group("start") stop = m.group("stop") tid = None date = None if m.group("time"): tid = m.group("time") if m.group("date"): date = m.group("date")[11:] tid = m.group("date")[0:10] if tid: tpar = 0 else: tpar = 1 tid = str(self.localtime())[11:16] datestring = "" if date: then = date.split("-") then = datetime.date(int(then[0]),int(then[1]),int(then[2])) datestring = str(then.day) + "." + str(then.month) + "." + str(then.year)[-2:] baseurl = baseurl.encode('utf-8') baseurl = urllib.unquote(baseurl) start = start.encode('latin_1') stop = stop.encode('latin_1') queryurl = baseurl % (urllib.quote(start),urllib.quote(stop),tid,tpar) if date: queryurl += "&REQ0JourneyDate=" + datestring req = urllib2.Request(queryurl) data = urllib2.urlopen(req).read().replace(" "," ") data = decodehtml(data) # if we get a choice for the "from"-field recheck = False if re.search(r'<label for="from" class="ErrorText">Vilken', data, re.IGNORECASE): recheck = True match = re.search(r'<option value="S-0N1">([^[]+)\[', data, re.IGNORECASE) if match: start = match.group(1).strip() else: self.say("error1, i sorry") if re.search(r'<label for="to" class="ErrorText">Vilken', data, re.IGNORECASE): recheck = True match = re.search(r'<option value="S-1N1">([^[]+)\[', data, re.IGNORECASE) if match: stop = match.group(1).strip() else: self.say("error2 i sorry") if recheck: if date: queryurl += "&REQ0JourneyDate=" + datestring queryurl = baseurl % (urllib.quote(start),urllib.quote(stop),tid,tpar) #req = urllib2.Request(queryurl) data = urllib.urlopen(queryurl).read().replace(" "," ") data = decodehtml(data) #Find earlier/next post data m = re.search(r'tidigare resor."\s*name="(?P<earlier>[^"]+)"', data, re.I | re.DOTALL) if m: self.bot.sl_earlier = m.group("earlier") m = re.search(r'senare resor."\s*name="(?P<later>[^"]+)"', data, re.I | re.DOTALL) if m: self.bot.sl_later = m.group("later") m = re.search(r'tp_results_form"\s*action="(?P<posttarget>[^"]+)"', data, re.I | re.DOTALL) if m: self.bot.sl_posttarget = m.group("posttarget") #Parse the page match = re.search(r'<div class="FormAreaLight">.+<h3>([^<]+)</h3>.*-bottom:..?px;">.+?<p>(.*)</p><p>' ,data, re.DOTALL | re.IGNORECASE) if match: head = match.group(1) body = match.group(2) else: head = body = None self.say("machine no work") return body = re.sub("</?[a-z]{1,2} ?/?>"," ",body) body = re.sub("</?[a-z]{3,10}>",chr(2),body) foot = body[body.index("Restid"):] body = body[:body.index("Restid")].replace(" "," ") b2 = body[body.index("Du är framme"):] b1 = body[:body.index("Du är framme")] self.say("\x02%s\x02" % head)# "från xx till xx den blabla" self.say(b1)# "tag ... från ..." self.say(b2)# "du är framme...." self.say(foot) # "restid xx minuter"
def cleanup(s): s = re.sub(r'\<br ?\/?\>', chr(10), s) s = re.sub(r'</?b>', chr(2), s) s = decodehtml(s) s = re.sub(r'<[^>]+>', '', s) return s