Python removehtml Exemples, utils.removehtml Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : rss.py Projet : chfr/spiffy

def checksites(self, pattern=None):
    for site in self.storage['sites']:
        if (pattern or "") in site.url:
            try:
                if pattern:
                    reactor.callFromThread(self.msg, site.chan, "Checking %s..." % site.url)
                res = site.check()
                if res:
                    if pattern:
                        reactor.callFromThread(self.msg, site.chan, "Found %d new entries:" % len(res))
                    res.reverse()
                    for entry in res:
                        reactor.callFromThread(self.msg, site.chan, "[RSS] \x02%s\x02 - \x1f%s" % (decodehtml(entry.get('title', '')), entry.get('link', '')))
                        msg = entry.get('description', '')
                        msg = re.sub("<br\s?/?>", "\n", msg)
                        msg = decodehtml(removehtml(msg))
                        if site.limit:
                            msg = "\n".join(msg.split("\n")[:site.limit])
                        reactor.callFromThread(self.msg, site.chan, msg)
                else:
                    if pattern:
                        reactor.callFromThread(self.msg, site.chan, "No new entries found.")
                        
            except Exception, e:
                reactor.callFromThread(self.msg, site.chan, "\x02RSS:\x02 Error while checking %s. (%s)!" % (site.url, e))

Exemple #2

0

Afficher le fichier

Fichier : google.py Projet : s3/spiffy

def google(self, input):
    """Perform a web search using the Google search engine"""

    args = input.args or ""
    parser = self.OptionParser()
    parser.add_option("-d", "-r", "--results", dest="results", default=1, type="int")
    (options, args) = parser.parse_args(args.split())
    if not args:
        raise self.BadInputError()
    query = " ".join(args).encode('utf-8')

    if options.results < 1:
        options.results = 1
    elif options.results > 10:
        options.results = 10

    try:
        data = urllib.urlopen('http://www.google.com/uds/GwebSearch?callback=GwebSearch.RawCompletion&context=0&lstkp=0&hl=en&key=ABQIAAAAeBvxXUmueP_8_kTINo0H4hSKL4HoBFFxfS_vfvgFpLqAt5GPWRTHDAESci2RYvZRkcpsYXapXjZWKA&v=1.0&rsz=large&q=%s' % urllib.quote(query)).read()
    except IOError: 
        self.say("Error: Unable to establish a connection to google.com")
        return
    data =  unescapeuni(data)
    data = decodehtml(data)

    m = re.search('estimatedResultCount":"([^"]+)"', data)
    if m:
        matches = m.group(1)
    m = re.findall(r'"url":"([^"]*)".*?"titleNoFormatting":"([^"]*)","content":"([^"]*)"', data, re.IGNORECASE)
    if m:
        if len(m) < options.results:
            options.results = len(m)
        if options.results == 1:
            self.say('\x02%s\x02 - ( \x1f%s\x1f ) [%s matches]' % (removehtml(m[0][1]), urllib.unquote(m[0][0]), matches))
            self.say(removehtml(m[0][2]))
        else:
            self.say('Showing the first \x02%s\x02 of \x02%s\x02 matches' % (options.results, matches))
            for x in range(options.results):
                self.say('\x02%s\x02 - ( \x1f%s\x1f )' % (removehtml(m[x][1]), urllib.unquote(m[x][0])))

    else:
        phenny.say('Your search for \x02%s\x02 did not return any results.' % input.args)

Exemple #3

0

Afficher le fichier

Fichier : rss.py Projet : s3/spiffy

def checksites(self, pattern=None, savefunc=None):
    for site in self.rss_db:
        if (pattern or "") in site.url:
            try:
                res = site.check()
                if res:
                    res.reverse()
                    for entry in res:
                        self.sendLine("PRIVMSG " + site.chan + " :[RSS] \x02%s\x02 - \x1f%s" % (entry.get('title', ''), entry.get('link', '')))
                        msg = entry.get('description', '')
                        msg = re.sub("<br\s?/?>", "\n", msg)
                        msg = removehtml(msg).split("\n")
                        for line in msg:
                            self.sendLine("PRIVMSG %s :%s" % (site.chan, line))
                        
            except Exception, e:
                self.sendLine("PRIVMSG " + site.chan + " :\x02RSS:\x02 Error while checking %s. (%s)!" % (site.url, e))

Exemple #4

0

Afficher le fichier

Fichier : isitdown.py Projet : chfr/spiffy

def isitdown(self, input):
    """Check if a website is down using downforeveryoneorjustme.com"""
    if not input.args:
        raise self.BadInputError()

    site = urllib.quote(input.args.encode('utf-8'))

    try:
        data = urllib.urlopen('http://downforeveryoneorjustme.com/%s' % site).read()
    except IOError: 
        self.say('Error: Unable to establish a connection to downforeveryoneorjustme.com.')
        return

    data = data.replace('\r','').replace('\n','')
    m = re.search(r'<div id="container">(?P<resp>.*?)(?:Check another|Try again)',data,re.IGNORECASE)
    if not m:
        self.say('Error: Could not parse data. Has the site layout changed?')
        return

    m = re.sub(r'(?:<a href[^>]*>|</a>)','\x02', m.group("resp"))
    m = removehtml(m).strip()
    self.say(m)

Exemple #5

0

Afficher le fichier

Fichier : whatis.py Projet : chfr/spiffy

def whatis(self, input):
    """Performs a "what is <argument>" query to Google and displays the result"""

    if not input.args:
        raise self.BadInputError()

    query = input.args.strip()

    showurl = False
    if query.startswith("-u "):
        showurl = True
        query = query[3:]
    query = "what is " + query
    
    query = query.encode('utf-8')
    url = qurl % urllib.quote(query)
    
    if showurl:
        self.say(chr(2) + "URL: " + chr(2) + url)

    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor)
    headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.2) Gecko/2008091620 Firefox/3.0.2',
               'Connection':'Keep-Alive', 'Content-Type':'application/x-www-form-urlencoded'}
    
    page = opener.open(urllib2.Request(url,None,headers)).read()
    
    regexes = [r"<h[23]\sclass=r[^>]*><b>(.+?)</b></h[23]>",
             r"onebox/[^>]*>(.*?)<(?:/table|br)"]
    
    for regex in regexes:
        match = re.search(regex, page,re.IGNORECASE)
        if match:
            self.say(decodehtml(removehtml(match.group(1).strip())))
            return

    self.say("Dunno :S")

Exemple #6

0

Afficher le fichier

Fichier : rss.py Projet : s3/spiffy

def rss(self, input):
    """Checks RSS feeds periodically and notifies the channel when a new post is added."""
    
    if not input.sender.startswith('#'): return
    cmd = input.args or ""

    parser = self.OptionParser()
    parser.add_option("-r", "--remove", dest="remove")
    parser.add_option("-c", "--check", dest="check")
    parser.add_option("-l", "--list", dest="list", action="store_true")
    parser.add_option("-d", "--display", dest="display", type="int")

    options, args = parser.parse_args(cmd.split())

    if options.remove:
        for site in self.rss_db:
            if options.remove in site.url:
                self.say("Removing: %s" % site.url)
                self.rss_db.remove(site)
        savedb(self.rss_filename, self.rss_db)
        
    elif options.display:
        if not args:
            self.say("\x02Error:\x02 A pattern must be provided when using switch -d")
            return
        
        for site in self.rss_db:
            if "".join(args) in site.url:
                entries = site.getentries(options.display)

                for entry in entries:
                    self.say("[RSS] \x02%s\x02 - \x1f%s" % (entry.get('title', ''), entry.get('link', '')))
                    msg = entry.get('description', '')
                    msg = re.sub("<br\s?/?>", "\n", msg)
                    msg = removehtml(msg).split("\n")[:3] # print max 3 lines of description
                    for line in msg:
                        self.say(line)

    elif options.check:
        checksites(self, options.check)

    elif options.list:
        for site in self.rss_db:
            if ("".join(args) or "") in site.url:
                self.say("Added by \x02%s\x02 on \x02%s\x02:" % (site.added_by, site.added_on))
                self.say(  "\x02Url:  \x02 %s" % site.url)

        if not self.rss_db:
            self.say("No feeds added yet!")

    elif args:
        url = " ".join(args)

        for site in self.rss_db:
            if url == site.url:
                self.say("Feed already exists, try using the -l switch to check for it.")
                return
            
        try:
            site = RSS(url, input.nick, input.sender)
        except Exception, e:
            self.say("Error: %s" % e)
            return
        
        try:
            if site.check() == None:
                self.say("\x02Error:\x02 Unable to parse the feed at %s." % url)
                return
        except Exception, e:
            self.say("Error: %s" % e)
            return