Python decodehtml Examples, utils.decodehtml Python Examples

Example #1

0

Show file

File: head.py Project: chfr/spiffy

def autotitle(self, input):
    """Automatically shows the title for specified sites"""

    if hasattr(self.bot,"pluginstorage_at"):
        self.storage = self.bot.pluginstorage_at
    else:
        self.say("Patterns not loaded, hopefully this should never happen")

    matches = re.findall(r"(https?://[^ ]+|www\.[^ ]+)", input.args, re.I)
    if not matches:
       return

    for m in matches:
        url = m.encode('utf-8')
        if not url.startswith("http"):
            url = "http://" + url

        for p in self.storage["autotitle"]:
            if re.search(p, url, re.I):
                try:
                    page = tounicode(urllib2.urlopen(url).read())
                    title = re.search('<title>(.*?)</title>', page, re.I | re.MULTILINE | re.DOTALL)
                    if not title:
                        self.say("Page has no title tag!")
                        return
                    title = decodehtml(title.group(1).replace("\n","")).strip()
                    title = re.sub(r"\s+", " ", title)
                    self.say("\x02Title:\x02 %s" % title)
                except urllib2.URLError, e:
                    self.say('Error: Invalid url.')

Example #2

0

Show file

File: head.py Project: chfr/spiffy

def title(self, input):
    """Fetches the contents of the <title> tag of a web page"""
    url = input.args.strip()
    
    if not url: 
        try:
            url = self.lasturl[input.sender.lower()]
        except KeyError:
            self.reply("No URLs posted previously and none given, nothing I can do.")
            return
    
    m = re.search(r"^https?://", url, re.I)
    if not m:
        url = "http://" + url
       
    self.lasturl[input.sender.lower()] = url       

       
    try:
        page = tounicode(urllib2.urlopen(url).read())
        title = re.search('<title>(.*?)</title>', page, re.I | re.MULTILINE | re.DOTALL)
        if not title:
            self.say("Page has no title tag!")
            return
        self.say("\x02Title:\x02 %s" % decodehtml(title.group(1).replace("\n","")))
    except urllib2.URLError, e:
        self.say('Error: Invalid url.')

Example #3

0

Show file

File: rss.py Project: chfr/spiffy

def checksites(self, pattern=None):
    for site in self.storage['sites']:
        if (pattern or "") in site.url:
            try:
                if pattern:
                    reactor.callFromThread(self.msg, site.chan, "Checking %s..." % site.url)
                res = site.check()
                if res:
                    if pattern:
                        reactor.callFromThread(self.msg, site.chan, "Found %d new entries:" % len(res))
                    res.reverse()
                    for entry in res:
                        reactor.callFromThread(self.msg, site.chan, "[RSS] \x02%s\x02 - \x1f%s" % (decodehtml(entry.get('title', '')), entry.get('link', '')))
                        msg = entry.get('description', '')
                        msg = re.sub("<br\s?/?>", "\n", msg)
                        msg = decodehtml(removehtml(msg))
                        if site.limit:
                            msg = "\n".join(msg.split("\n")[:site.limit])
                        reactor.callFromThread(self.msg, site.chan, msg)
                else:
                    if pattern:
                        reactor.callFromThread(self.msg, site.chan, "No new entries found.")
                        
            except Exception, e:
                reactor.callFromThread(self.msg, site.chan, "\x02RSS:\x02 Error while checking %s. (%s)!" % (site.url, e))

Example #4

0

Show file

File: spotify.py Project: chfr/spiffy

def spotify(self, input):
    """Automatically catches Spotify URLs and retrieves track info"""
    
    trackreg = r"(http://open.spotify.com/track/[^\s]+)"
    track = re.search(trackreg,input.args,re.I)

    if track:
        self.say(decodehtml(spotifytrack(track.group(1))))
        return

    albumreg = r"(http://open.spotify.com/album/[^\s]+)"
    album = re.search(albumreg,input.args,re.I)
    
    if album:
        self.say(decodehtml(spotifyalbum(album.group(1))))
        return
    
    # TODO: playlists?
    playlist = None
    if playlist:
        pass

Example #5

0

Show file

File: google.py Project: s3/spiffy

def google(self, input):
    """Perform a web search using the Google search engine"""

    args = input.args or ""
    parser = self.OptionParser()
    parser.add_option("-d", "-r", "--results", dest="results", default=1, type="int")
    (options, args) = parser.parse_args(args.split())
    if not args:
        raise self.BadInputError()
    query = " ".join(args).encode('utf-8')

    if options.results < 1:
        options.results = 1
    elif options.results > 10:
        options.results = 10

    try:
        data = urllib.urlopen('http://www.google.com/uds/GwebSearch?callback=GwebSearch.RawCompletion&context=0&lstkp=0&hl=en&key=ABQIAAAAeBvxXUmueP_8_kTINo0H4hSKL4HoBFFxfS_vfvgFpLqAt5GPWRTHDAESci2RYvZRkcpsYXapXjZWKA&v=1.0&rsz=large&q=%s' % urllib.quote(query)).read()
    except IOError: 
        self.say("Error: Unable to establish a connection to google.com")
        return
    data =  unescapeuni(data)
    data = decodehtml(data)

    m = re.search('estimatedResultCount":"([^"]+)"', data)
    if m:
        matches = m.group(1)
    m = re.findall(r'"url":"([^"]*)".*?"titleNoFormatting":"([^"]*)","content":"([^"]*)"', data, re.IGNORECASE)
    if m:
        if len(m) < options.results:
            options.results = len(m)
        if options.results == 1:
            self.say('\x02%s\x02 - ( \x1f%s\x1f ) [%s matches]' % (removehtml(m[0][1]), urllib.unquote(m[0][0]), matches))
            self.say(removehtml(m[0][2]))
        else:
            self.say('Showing the first \x02%s\x02 of \x02%s\x02 matches' % (options.results, matches))
            for x in range(options.results):
                self.say('\x02%s\x02 - ( \x1f%s\x1f )' % (removehtml(m[x][1]), urllib.unquote(m[x][0])))

    else:
        phenny.say('Your search for \x02%s\x02 did not return any results.' % input.args)

Example #6

0

Show file

File: unnecessaryknowledge.py Project: chfr/spiffy

def unnecessaryknowledge(self, input):
    """Get som unnecessary knowledge from unnecessaryknowledge.com"""

    if not input.args:
        raise self.BadInputError()
        
    try:
        data = urllib.urlopen('http://www.unnecessaryknowledge.com/_default.asp').read()
    except IOError: 
        self.say("Error: Unable to establish a connection to unnecessaryknowledge.com.")
        return

    data = data.replace('\r','').replace('\n','')
    m = re.search(r"<h2[^>]+?>(?P<text>.+?)</h2>",data,re.IGNORECASE)
    if not m:
        self.say("Error: Unable to parse data.")
        return

    msg = m.group("text")
    re.sub(r"(?:<a href[^>]*>|</a>)",'\x02', msg)
    msg = decodehtml(msg).strip()
    self.say(msg)

Example #7

0

Show file

File: whatis.py Project: chfr/spiffy

def whatis(self, input):
    """Performs a "what is <argument>" query to Google and displays the result"""

    if not input.args:
        raise self.BadInputError()

    query = input.args.strip()

    showurl = False
    if query.startswith("-u "):
        showurl = True
        query = query[3:]
    query = "what is " + query
    
    query = query.encode('utf-8')
    url = qurl % urllib.quote(query)
    
    if showurl:
        self.say(chr(2) + "URL: " + chr(2) + url)

    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor)
    headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.2) Gecko/2008091620 Firefox/3.0.2',
               'Connection':'Keep-Alive', 'Content-Type':'application/x-www-form-urlencoded'}
    
    page = opener.open(urllib2.Request(url,None,headers)).read()
    
    regexes = [r"<h[23]\sclass=r[^>]*><b>(.+?)</b></h[23]>",
             r"onebox/[^>]*>(.*?)<(?:/table|br)"]
    
    for regex in regexes:
        match = re.search(regex, page,re.IGNORECASE)
        if match:
            self.say(decodehtml(removehtml(match.group(1).strip())))
            return

    self.say("Dunno :S")

Example #8

0

Show file

File: sl.py Project: s3/spiffy

def sl(self, input):
    """Queries sl.se for train/bus times"""
    
    cmd = input.args
    
    m = re.search(r'(?P<later>sen|tidig)(?:are)?|(?P<start>[^,]+),\s*(?P<stop>[^,]+)(?:,\s*?(?:(?P<date>\d{4}-\d{2}-\d{2} [012]?[0-9][.:]?[0-5][0-9])|(?P<time>[01-2]?[0-9][.:]?[0-5][0-9])))?', cmd, re.I)
    if not m:
        raise self.BadInputError()
        return
        
    baseurl = """http://reseplanerare.sl.se/bin/query.exe/sn?REQ0JourneyStopsS0A=255&S=%s&REQ0JourneyStopsZ0A=255&Z=%s&start=yes&REQ0JourneyTime%3D%s&REQ0HafasSearchForw=%s"""
    nick = input.nick
    
    if m.group("later"):
        if not hasattr(self.bot, "sl_posttarget"):
            self.say("Sorry, didn't work!")
            return
        
        if "sen" in m.group("later"):
            earlat = {self.bot.sl_later:"&#197;k senare"}
        else:
            earlat = {self.bot.sl_earlier:"&#197;k tidigare"}
    
        earlat = urllib.urlencode(earlat)
        req = urllib2.Request(self.bot.sl_posttarget, earlat)
        data = urllib2.urlopen(req).read()
        data = decodehtml(data)
    
    if m.group("start"):
        start = m.group("start")
        stop = m.group("stop")
        tid = None
        date = None
        if m.group("time"):
            tid = m.group("time")
        if m.group("date"):
            date = m.group("date")[11:]
            tid = m.group("date")[0:10]
            

        if tid:
            tpar = 0
        else:
            tpar = 1
    
            tid = str(self.localtime())[11:16]
    
        datestring = ""
        if date:
            then = date.split("-")
            then = datetime.date(int(then[0]),int(then[1]),int(then[2]))
            datestring = str(then.day) + "." + str(then.month) + "." + str(then.year)[-2:]
    
        baseurl = baseurl.encode('utf-8')
        baseurl = urllib.unquote(baseurl)
        start = start.encode('latin_1')
        stop = stop.encode('latin_1')
    
        queryurl = baseurl % (urllib.quote(start),urllib.quote(stop),tid,tpar)
        if date:
            queryurl += "&REQ0JourneyDate=" + datestring


        req = urllib2.Request(queryurl)
        data = urllib2.urlopen(req).read().replace("&nbsp;"," ")
        data = decodehtml(data)

        # if we get a choice for the "from"-field
        recheck = False
        if re.search(r'<label for="from" class="ErrorText">Vilken', data, re.IGNORECASE):
            recheck = True
            match = re.search(r'<option value="S-0N1">([^[]+)\[', data, re.IGNORECASE)
            if match:
                start = match.group(1).strip()
    
            else:
                self.say("error1, i sorry")
    
        if re.search(r'<label for="to" class="ErrorText">Vilken', data, re.IGNORECASE):
            recheck = True
            match = re.search(r'<option value="S-1N1">([^[]+)\[', data, re.IGNORECASE)
            if match:
                stop = match.group(1).strip()
    
            else:
                self.say("error2 i sorry")
    
        if recheck:
            if date:
                queryurl += "&REQ0JourneyDate=" + datestring
                
            queryurl = baseurl % (urllib.quote(start),urllib.quote(stop),tid,tpar)
    
            #req = urllib2.Request(queryurl)
            data = urllib.urlopen(queryurl).read().replace("&nbsp;"," ")
            data = decodehtml(data)
    

    #Find earlier/next post data
    m = re.search(r'tidigare resor."\s*name="(?P<earlier>[^"]+)"', data, re.I | re.DOTALL)
    if m:
        self.bot.sl_earlier = m.group("earlier")

    m = re.search(r'senare resor."\s*name="(?P<later>[^"]+)"', data, re.I | re.DOTALL)
    if m:
        self.bot.sl_later = m.group("later")
        
    m = re.search(r'tp_results_form"\s*action="(?P<posttarget>[^"]+)"', data, re.I | re.DOTALL)
    if m:
        self.bot.sl_posttarget = m.group("posttarget")



    #Parse the page
    match = re.search(r'<div class="FormAreaLight">.+<h3>([^<]+)</h3>.*-bottom:..?px;">.+?<p>(.*)</p><p>'
                      ,data, re.DOTALL | re.IGNORECASE)
    if match:
            head = match.group(1)
            body = match.group(2)
    else:
            head = body = None
            self.say("machine no work")
            return
    
    
    body = re.sub("</?[a-z]{1,2} ?/?>"," ",body)
    body = re.sub("</?[a-z]{3,10}>",chr(2),body)

    foot = body[body.index("Restid"):]
    body = body[:body.index("Restid")].replace("  "," ")
    b2 = body[body.index("Du är framme"):]
    b1 = body[:body.index("Du är framme")]
    
    self.say("\x02%s\x02" % head)# "från xx till xx den blabla"
    self.say(b1)# "tag ... från ..."
    self.say(b2)# "du är framme...."
    self.say(foot) # "restid xx minuter"

Example #9

0

Show file

File: urbandictionary.py Project: chfr/spiffy

def cleanup(s):
    s = re.sub(r'\<br ?\/?\>', chr(10), s)
    s = re.sub(r'</?b>', chr(2), s)
    s = decodehtml(s)
    s = re.sub(r'<[^>]+>', '', s)
    return s