Python stripchars Examples

Programming Language: Python

Namespace/Package Name: utility

Method/Function: stripchars

Examples at hotexamples.com: 4

Python stripchars - 4 examples found. These are the top rated real world Python examples of utility.stripchars extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: interface_draw.py Project: bendmorris/canto

    def reader_render_links(self, dict):
        if not dict["show_links"]:
            return

        dict["content"] += "\n"
        for idx, link in enumerate(dict["links"]):
            if link[2] == "link":
                color = u"%4"
            elif link[2] == "image":
                color = u"%7"
            else:
                color = u"%8"

            dict["content"] += color + u"[" + unicode(idx) + u"] " + \
                    link[0] + u"%1 - " + utility.stripchars(link[1]) + "\n"

Example #2

Show file

File: interface_draw.py Project: stomar/Canto

    def reader_render_links(self, dict):
        if not dict["show_links"]:
            return

        dict["content"] += "\n"
        for idx, link in enumerate(dict["links"]):
            if link[2] == "link":
                color = u"%4"
            elif link[2] == "image":
                color = u"%7"
            else:
                color = u"%8"

            dict["content"] += color + u"[" + unicode(idx) + u"] " + \
                    link[0] + u"%1 - " + utility.stripchars(link[1]) + "\n"

Example #3

Show file

File: canto_fetch.py Project: stomar/Canto

    def run(self):
        curfeed = self.get_curfeed()

        # Determine whether it's been long enough between
        # updates to warrant refetching the feed.

        if time.time() - curfeed["canto_update"] < self.fd.rate * 60 and\
                not self.force:
            return

        # Attempt to set the tag, if unspecified, by grabbing
        # it out of the previously downloaded info.

        if not self.fd.base_set:
            if "feed" in curfeed and "title" in curfeed["feed"]:
                replace = lambda x: x or curfeed["feed"]["title"]
                self.fd.tags = [replace(x) for x in self.fd.tags]
                self.fd.base_set = 1
                self.log_func("Updating %s" % self.fd.tags[0])
            else:
                # This is the first time we've gotten this URL,
                # so just use the URL since we don't know the title.

                self.log_func("New feed %s" % self.fd.URL)
        else:
            self.log_func("Updating %s" % self.fd.tags[0])

        # This block set newfeed to a parsed feed.

        try:
            # Feed from script
            if self.fd.URL.startswith("script:"):
                script = self.spath + "/" + self.fd.URL[7:]
                out = commands.getoutput(script)
                newfeed = feedparser.parse(out)
            # Feed from URL
            else:
                request = urllib2.Request(self.fd.URL)
                request.add_header('User-Agent',\
                    "Canto/%d.%d.%d + http://codezen.org/canto" %\
                    VERSION_TUPLE)

                # Feed from URL w/ password
                if self.fd.username or self.fd.password:
                    mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
                    domain = urlparse.urlparse(self.fd.URL)[1]
                    mgr.add_password(None, domain,\
                            self.fd.username, self.fd.password)

                    # First, we try Basic Authentication
                    auth = urllib2.HTTPBasicAuthHandler(mgr)
                    opener = urllib2.build_opener(auth)
                    try:
                        newfeed = feedparser.parse(opener.open(request))
                    except:
                        # And, failing that, try Digest Authentication
                        auth = urllib2.HTTPDigestAuthHandler(mgr)
                        opener = urllib2.build_opener(auth)
                        newfeed = feedparser.parse(opener.open(request))
                # Feed with no password.
                else:
                    newfeed = feedparser.parse(\
                            feedparser.urllib2.urlopen(request))
        except:
            # Generally an exception is a connection refusal, but in any
            # case we either won't get data or can't trust the data, so
            # just skip processing this feed.

            enc = locale.getpreferredencoding()
            self.log_func("Exception trying to get feed %s : %s" % \
                    (self.fd.URL.encode(enc, "ignore"), sys.exc_info()[1]))

            return

        # I don't know why feedparser doesn't actually throw this
        # since all URLErrors are basically unrecoverable.

        if "bozo_exception" in newfeed:
            if type(newfeed["bozo_exception"]) == urllib2.URLError:
                self.log_func(\
                    "Feedparser exception getting %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"].reason))
                return
            if not len(newfeed["entries"]):
                self.log_func(\
                    "Feedparser exception, no content in %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"]))
                return

        # Filter out "No Content" message since we apparently have real content

        curfeed["entries"] = [ x for x in curfeed["entries"] if x["id"] !=\
                "canto-internal"]

        # For new feeds whose base tag is still not set, attempt to get a title
        # again.

        if not self.fd.base_set:
            if "feed" not in newfeed or "title" not in newfeed["feed"]:
                self.log_func("Ugh. Defaulting to URL for tag. No guarantees.")
                newfeed["feed"]["title"] = self.fd.URL

            replace = lambda x: x or newfeed["feed"]["title"]
            self.fd.tags = [replace(x) for x in self.fd.tags]

        # Feedparser returns a very nice dict of information.
        # if there was something wrong with the feed (usu. encodings
        # being mis-declared or missing tags), it sets
        # bozo_exception.

        # These exceptions are recoverable and their objects are
        # un-Picklable so we log it and remove the value.

        if "bozo_exception" in newfeed:
            self.log_func("Recoverable error in feed %s: %s" %
                          (self.fd.tags[0], newfeed["bozo_exception"]))
            newfeed["bozo_exception"] = None

        # Make state persist between feeds. Currently, this is completely
        # unused, as there's no state information that needs to be propagated.
        # This is a relic from when feeds and tags were the same thing, however
        # it could be useful when doing integration with another client /
        # website and thus, hasn't been removed.

        newfeed["canto_state"] = curfeed["canto_state"]
        newfeed["canto_update"] = time.time()

        # We can set this here, without checking curfeed.
        # Any migration should be done in the get_curfeed function,
        # when the old data is first loaded.

        newfeed["canto_version"] = VERSION_TUPLE

        # For all content that we would usually use, we escape all of the
        # slashes and other potential escapes, except for the link item,
        # which is escaped in the reader when it is displayed. This is to
        # prevent sending garbeled links to the exteranl browser.

        for key in newfeed["feed"]:
            if type(newfeed["feed"][key]) in [unicode, str]:
                newfeed["feed"][key] = utility.stripchars(newfeed["feed"][key])

        for entry in newfeed["entries"]:
            for subitem in ["content", "enclosures"]:
                if subitem in entry:
                    for e in entry[subitem]:
                        for k in e.keys():
                            if type(e[k]) in [unicode, str]:
                                e[k] = utility.stripchars(e[k])

            for key in entry.keys():
                if type(entry[key]) in [unicode, str] and key != "link":
                    entry[key] = utility.stripchars(entry[key])

        for entry in newfeed["entries"]:
            # If the item didn't come with a GUID, then
            # use link and then title as an identifier.

            if not "id" in entry:
                if "link" in entry:
                    entry["id"] = entry["link"]
                elif "title" in entry:
                    entry["id"] = entry["title"]
                else:
                    entry["id"] = None

        # Then search through the current feed to
        # make item state persistent, and loop until
        # it's safe to update on disk.

        new = []
        while 1:
            for entry in newfeed["entries"]:
                for centry in curfeed["entries"]:
                    if entry["id"] == centry["id"]:
                        entry["canto_state"] = centry["canto_state"]

                        # The entry is removed so that later it's
                        # not a candidate for being appended to the
                        # end of the feed.

                        curfeed["entries"].remove(centry)
                        break
                else:
                    new.append(entry)

                # Apply default state to genuinely new items.
                if "canto_state" not in entry:
                    entry["canto_state"] = self.fd.tags + [u"*"]

            # Tailor the list to the correct number of items. In canto < 0.7.0,
            # you could specify a keep that was lower than the number of items
            # in the feed. This was simply done, but ultimately it caused too
            # much "bounce" for social news feeds. Items get put into the feed,
            # are upvoted enough to be within the first n items, you change
            # their state, they move out of the first n items, are forgotten,
            # then are upvoted again into the first n item and (as far as c-f
            # knows) are treated like brand new items.

            # This will still be a problem if items get taken out of the feed
            # and put back into the feed (and the item isn't in the extra kept
            # items), but then it becomes a site problem, not a reader problem.

            if self.fd.keep and len(newfeed["entries"]) < self.fd.keep:
                newfeed["entries"] += curfeed["entries"]\
                        [:self.fd.keep - len(newfeed["entries"])]

            # Enforce the "never_discard" setting
            # We iterate through the stories and then the tag so that
            # feed order is preserved.

            for e in curfeed["entries"]:
                for tag in self.cfg.never_discard:
                    if tag == "unread":
                        if "read" in e["canto_state"]:
                            continue
                    elif tag not in e["canto_state"]:
                        continue
                    if e not in newfeed["entries"]:
                        newfeed["entries"].append(e)

            if self.cfg.new_hook:
                for entry in [e for e in new if e in newfeed["entries"]]:
                    self.cfg.new_hook(newfeed, entry, entry == new[-1])

            # Dump the output to the new file.

            # Locking and writing is counter-intuitive using fcntl. If you open
            # with "w" and fail to get the lock, the data is still deleted. The
            # solution is to open with "a", get the lock and then truncate the
            # file.

            f = open(self.fpath, "a")
            fcntl.flock(f.fileno(), fcntl.LOCK_EX)

            # The feed was modified out from under us.
            if self.prevtime and self.prevtime != os.stat(self.fpath).st_mtime:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

                # Reread the state from disk.
                newer_curfeed = self.get_curfeed()

                # There was an actual c-f update done, bail.
                if newer_curfeed["canto_update"] != curfeed["canto_update"]:
                    self.log_func("%s updated already, bailing" %
                                  self.fd.tags[0])
                    break

                # Just a state modification by the client, update and continue.
                else:
                    curfeed = newer_curfeed
                    continue

            # Truncate the file
            f.seek(0, 0)
            f.truncate()

            try:
                # Dump the feed item. It's important to flush afterwards to
                # avoid unlocking the file before all the IO is finished.
                cPickle.dump(newfeed, f)
                f.flush()
            except:
                self.log_func("cPickle dump exception on %s" % self.fpath)
            finally:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

            # If we managed to write to disk, break out of the while loop and
            # the thread will exit.

            break

Example #4

Show file

File: canto_fetch.py Project: bendmorris/canto

    def run(self):
        curfeed = self.get_curfeed()

        # Determine whether it's been long enough between
        # updates to warrant refetching the feed.

        if time.time() - curfeed["canto_update"] < self.fd.rate * 60 and\
                not self.force:
            return

        # Attempt to set the tag, if unspecified, by grabbing
        # it out of the previously downloaded info.

        if not self.fd.base_set:
            if "feed" in curfeed and "title" in curfeed["feed"]:
                replace = lambda x: x or curfeed["feed"]["title"]
                self.fd.tags = [ replace(x) for x in self.fd.tags]
                self.fd.base_set = 1
                self.log_func("Updating %s" % self.fd.tags[0])
            else:
                # This is the first time we've gotten this URL,
                # so just use the URL since we don't know the title.

                self.log_func("New feed %s" % self.fd.URL)
        else:
            self.log_func("Updating %s" % self.fd.tags[0])

        # This block set newfeed to a parsed feed.

        try:
            # Feed from script
            if self.fd.URL.startswith("script:"):
                script = self.spath + "/" + self.fd.URL[7:]
                out = commands.getoutput(script)
                newfeed = feedparser.parse(out)
            # Feed from URL
            else:
                request = urllib2.Request(self.fd.URL)
                request.add_header('User-Agent',\
                    "Canto/%d.%d.%d + http://codezen.org/canto" %\
                    VERSION_TUPLE)

                # Feed from URL w/ password
                if self.fd.username or self.fd.password:
                    mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
                    domain = urlparse.urlparse(self.fd.URL)[1]
                    mgr.add_password(None, domain,\
                            self.fd.username, self.fd.password)

                    # First, we try Basic Authentication
                    auth = urllib2.HTTPBasicAuthHandler(mgr)
                    opener = urllib2.build_opener(auth)
                    try:
                        newfeed = feedparser.parse(opener.open(request))
                    except:
                        # And, failing that, try Digest Authentication
                        auth = urllib2.HTTPDigestAuthHandler(mgr)
                        opener = urllib2.build_opener(auth)
                        newfeed = feedparser.parse(opener.open(request))
                # Feed with no password.
                else:
                    newfeed = feedparser.parse(\
                            feedparser.urllib2.urlopen(request))
        except:
            # Generally an exception is a connection refusal, but in any
            # case we either won't get data or can't trust the data, so
            # just skip processing this feed.

            enc = locale.getpreferredencoding()
            self.log_func("Exception trying to get feed %s : %s" % \
                    (self.fd.URL.encode(enc, "ignore"), sys.exc_info()[1]))

            return

        # I don't know why feedparser doesn't actually throw this
        # since all URLErrors are basically unrecoverable.

        if "bozo_exception" in newfeed:
            if type(newfeed["bozo_exception"]) == urllib2.URLError:
                self.log_func(\
                    "Feedparser exception getting %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"].reason))
                return
            if not len(newfeed["entries"]):
                self.log_func(\
                    "Feedparser exception, no content in %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"]))
                return

        # Filter out "No Content" message since we apparently have real content

        curfeed["entries"] = [ x for x in curfeed["entries"] if x["id"] !=\
                "canto-internal"]

        # For new feeds whose base tag is still not set, attempt to get a title
        # again.

        if not self.fd.base_set:
            if "feed" not in newfeed or "title" not in newfeed["feed"]:
                self.log_func("Ugh. Defaulting to URL for tag. No guarantees.")
                newfeed["feed"]["title"] = self.fd.URL

            replace = lambda x: x or newfeed["feed"]["title"]
            self.fd.tags = [ replace(x) for x in self.fd.tags]

        # Feedparser returns a very nice dict of information.
        # if there was something wrong with the feed (usu. encodings
        # being mis-declared or missing tags), it sets
        # bozo_exception.

        # These exceptions are recoverable and their objects are
        # un-Picklable so we log it and remove the value.

        if "bozo_exception" in newfeed:
            self.log_func("Recoverable error in feed %s: %s" % 
                        (self.fd.tags[0], newfeed["bozo_exception"]))
            newfeed["bozo_exception"] = None

        # Make state persist between feeds. Currently, this is completely
        # unused, as there's no state information that needs to be propagated.
        # This is a relic from when feeds and tags were the same thing, however
        # it could be useful when doing integration with another client /
        # website and thus, hasn't been removed.

        newfeed["canto_state"] = curfeed["canto_state"]
        newfeed["canto_update"] = time.time()

        # We can set this here, without checking curfeed.
        # Any migration should be done in the get_curfeed function,
        # when the old data is first loaded.

        newfeed["canto_version"] = VERSION_TUPLE

        # For all content that we would usually use, we escape all of the
        # slashes and other potential escapes, except for the link item,
        # which is escaped in the reader when it is displayed. This is to
        # prevent sending garbeled links to the exteranl browser.

        for key in newfeed["feed"]:
            if type(newfeed["feed"][key]) in [unicode,str]:
                newfeed["feed"][key] = utility.stripchars(newfeed["feed"][key])

        for entry in newfeed["entries"]:
            for subitem in ["content","enclosures"]:
                if subitem in entry:
                    for e in entry[subitem]:
                        for k in e.keys():
                            if type(e[k]) in [unicode,str]:
                                e[k] = utility.stripchars(e[k])

            for key in entry.keys():
                if type(entry[key]) in [unicode,str] and key != "link":
                    entry[key] = utility.stripchars(entry[key])

        for entry in newfeed["entries"]:
            # If the item didn't come with a GUID, then
            # use link and then title as an identifier.

            if not "id" in entry:
                if "link" in entry:
                    entry["id"] = entry["link"]
                elif "title" in entry:
                    entry["id"] = entry["title"]
                else:
                    entry["id"] = None

        # Then search through the current feed to
        # make item state persistent, and loop until
        # it's safe to update on disk.

        new = []
        while 1:
            for entry in newfeed["entries"]:
                for centry in curfeed["entries"]:
                    if entry["id"] == centry["id"]:
                        entry["canto_state"] = centry["canto_state"]
                        
                        # The entry is removed so that later it's
                        # not a candidate for being appended to the
                        # end of the feed.

                        curfeed["entries"].remove(centry)
                        break
                else:
                    new.append(entry)

                # Apply default state to genuinely new items.
                if "canto_state" not in entry:
                    entry["canto_state"] = self.fd.tags + [u"*"]

            # Tailor the list to the correct number of items. In canto < 0.7.0,
            # you could specify a keep that was lower than the number of items
            # in the feed. This was simply done, but ultimately it caused too
            # much "bounce" for social news feeds. Items get put into the feed,
            # are upvoted enough to be within the first n items, you change
            # their state, they move out of the first n items, are forgotten,
            # then are upvoted again into the first n item and (as far as c-f
            # knows) are treated like brand new items.

            # This will still be a problem if items get taken out of the feed
            # and put back into the feed (and the item isn't in the extra kept
            # items), but then it becomes a site problem, not a reader problem.

            if self.fd.keep and len(newfeed["entries"]) < self.fd.keep:
                newfeed["entries"] += curfeed["entries"]\
                        [:self.fd.keep - len(newfeed["entries"])]

            # Enforce the "never_discard" setting
            # We iterate through the stories and then the tag so that
            # feed order is preserved.

            for e in curfeed["entries"]:
                for tag in self.cfg.never_discard:
                    if tag == "unread":
                        if "read" in e["canto_state"]:
                            continue
                    elif tag not in e["canto_state"]:
                        continue
                    if e not in newfeed["entries"]:
                        newfeed["entries"].append(e)

            if self.cfg.new_hook:
                for entry in [e for e in new if e in newfeed["entries"]]:
                    self.cfg.new_hook(newfeed, entry, entry == new[-1])

            # Dump the output to the new file.

            # Locking and writing is counter-intuitive using fcntl. If you open
            # with "w" and fail to get the lock, the data is still deleted. The
            # solution is to open with "a", get the lock and then truncate the
            # file.

            f = open(self.fpath, "a")
            fcntl.flock(f.fileno(), fcntl.LOCK_EX)

            # The feed was modified out from under us.
            if self.prevtime and self.prevtime != os.stat(self.fpath).st_mtime:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

                # Reread the state from disk.
                newer_curfeed = self.get_curfeed()

                # There was an actual c-f update done, bail.
                if newer_curfeed["canto_update"] != curfeed["canto_update"]:
                    self.log_func("%s updated already, bailing" %
                            self.fd.tags[0])
                    break

                # Just a state modification by the client, update and continue.
                else:
                    curfeed = newer_curfeed
                    continue

            # Truncate the file
            f.seek(0, 0)
            f.truncate()

            try:
                # Dump the feed item. It's important to flush afterwards to
                # avoid unlocking the file before all the IO is finished.
                cPickle.dump(newfeed, f)
                f.flush()
            except:
                self.log_func("cPickle dump exception on %s" % self.fpath)
            finally:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

            # If we managed to write to disk, break out of the while loop and
            # the thread will exit.

            break