Exemple #1
0
    def reader_render_links(self, dict):
        if not dict["show_links"]:
            return

        dict["content"] += "\n"
        for idx, link in enumerate(dict["links"]):
            if link[2] == "link":
                color = u"%4"
            elif link[2] == "image":
                color = u"%7"
            else:
                color = u"%8"

            dict["content"] += color + u"[" + unicode(idx) + u"] " + \
                    link[0] + u"%1 - " + utility.stripchars(link[1]) + "\n"
Exemple #2
0
    def reader_render_links(self, dict):
        if not dict["show_links"]:
            return

        dict["content"] += "\n"
        for idx, link in enumerate(dict["links"]):
            if link[2] == "link":
                color = u"%4"
            elif link[2] == "image":
                color = u"%7"
            else:
                color = u"%8"

            dict["content"] += color + u"[" + unicode(idx) + u"] " + \
                    link[0] + u"%1 - " + utility.stripchars(link[1]) + "\n"
Exemple #3
0
    def run(self):
        curfeed = self.get_curfeed()

        # Determine whether it's been long enough between
        # updates to warrant refetching the feed.

        if time.time() - curfeed["canto_update"] < self.fd.rate * 60 and\
                not self.force:
            return

        # Attempt to set the tag, if unspecified, by grabbing
        # it out of the previously downloaded info.

        if not self.fd.base_set:
            if "feed" in curfeed and "title" in curfeed["feed"]:
                replace = lambda x: x or curfeed["feed"]["title"]
                self.fd.tags = [replace(x) for x in self.fd.tags]
                self.fd.base_set = 1
                self.log_func("Updating %s" % self.fd.tags[0])
            else:
                # This is the first time we've gotten this URL,
                # so just use the URL since we don't know the title.

                self.log_func("New feed %s" % self.fd.URL)
        else:
            self.log_func("Updating %s" % self.fd.tags[0])

        # This block set newfeed to a parsed feed.

        try:
            # Feed from script
            if self.fd.URL.startswith("script:"):
                script = self.spath + "/" + self.fd.URL[7:]
                out = commands.getoutput(script)
                newfeed = feedparser.parse(out)
            # Feed from URL
            else:
                request = urllib2.Request(self.fd.URL)
                request.add_header('User-Agent',\
                    "Canto/%d.%d.%d + http://codezen.org/canto" %\
                    VERSION_TUPLE)

                # Feed from URL w/ password
                if self.fd.username or self.fd.password:
                    mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
                    domain = urlparse.urlparse(self.fd.URL)[1]
                    mgr.add_password(None, domain,\
                            self.fd.username, self.fd.password)

                    # First, we try Basic Authentication
                    auth = urllib2.HTTPBasicAuthHandler(mgr)
                    opener = urllib2.build_opener(auth)
                    try:
                        newfeed = feedparser.parse(opener.open(request))
                    except:
                        # And, failing that, try Digest Authentication
                        auth = urllib2.HTTPDigestAuthHandler(mgr)
                        opener = urllib2.build_opener(auth)
                        newfeed = feedparser.parse(opener.open(request))
                # Feed with no password.
                else:
                    newfeed = feedparser.parse(\
                            feedparser.urllib2.urlopen(request))
        except:
            # Generally an exception is a connection refusal, but in any
            # case we either won't get data or can't trust the data, so
            # just skip processing this feed.

            enc = locale.getpreferredencoding()
            self.log_func("Exception trying to get feed %s : %s" % \
                    (self.fd.URL.encode(enc, "ignore"), sys.exc_info()[1]))

            return

        # I don't know why feedparser doesn't actually throw this
        # since all URLErrors are basically unrecoverable.

        if "bozo_exception" in newfeed:
            if type(newfeed["bozo_exception"]) == urllib2.URLError:
                self.log_func(\
                    "Feedparser exception getting %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"].reason))
                return
            if not len(newfeed["entries"]):
                self.log_func(\
                    "Feedparser exception, no content in %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"]))
                return

        # Filter out "No Content" message since we apparently have real content

        curfeed["entries"] = [ x for x in curfeed["entries"] if x["id"] !=\
                "canto-internal"]

        # For new feeds whose base tag is still not set, attempt to get a title
        # again.

        if not self.fd.base_set:
            if "feed" not in newfeed or "title" not in newfeed["feed"]:
                self.log_func("Ugh. Defaulting to URL for tag. No guarantees.")
                newfeed["feed"]["title"] = self.fd.URL

            replace = lambda x: x or newfeed["feed"]["title"]
            self.fd.tags = [replace(x) for x in self.fd.tags]

        # Feedparser returns a very nice dict of information.
        # if there was something wrong with the feed (usu. encodings
        # being mis-declared or missing tags), it sets
        # bozo_exception.

        # These exceptions are recoverable and their objects are
        # un-Picklable so we log it and remove the value.

        if "bozo_exception" in newfeed:
            self.log_func("Recoverable error in feed %s: %s" %
                          (self.fd.tags[0], newfeed["bozo_exception"]))
            newfeed["bozo_exception"] = None

        # Make state persist between feeds. Currently, this is completely
        # unused, as there's no state information that needs to be propagated.
        # This is a relic from when feeds and tags were the same thing, however
        # it could be useful when doing integration with another client /
        # website and thus, hasn't been removed.

        newfeed["canto_state"] = curfeed["canto_state"]
        newfeed["canto_update"] = time.time()

        # We can set this here, without checking curfeed.
        # Any migration should be done in the get_curfeed function,
        # when the old data is first loaded.

        newfeed["canto_version"] = VERSION_TUPLE

        # For all content that we would usually use, we escape all of the
        # slashes and other potential escapes, except for the link item,
        # which is escaped in the reader when it is displayed. This is to
        # prevent sending garbeled links to the exteranl browser.

        for key in newfeed["feed"]:
            if type(newfeed["feed"][key]) in [unicode, str]:
                newfeed["feed"][key] = utility.stripchars(newfeed["feed"][key])

        for entry in newfeed["entries"]:
            for subitem in ["content", "enclosures"]:
                if subitem in entry:
                    for e in entry[subitem]:
                        for k in e.keys():
                            if type(e[k]) in [unicode, str]:
                                e[k] = utility.stripchars(e[k])

            for key in entry.keys():
                if type(entry[key]) in [unicode, str] and key != "link":
                    entry[key] = utility.stripchars(entry[key])

        for entry in newfeed["entries"]:
            # If the item didn't come with a GUID, then
            # use link and then title as an identifier.

            if not "id" in entry:
                if "link" in entry:
                    entry["id"] = entry["link"]
                elif "title" in entry:
                    entry["id"] = entry["title"]
                else:
                    entry["id"] = None

        # Then search through the current feed to
        # make item state persistent, and loop until
        # it's safe to update on disk.

        new = []
        while 1:
            for entry in newfeed["entries"]:
                for centry in curfeed["entries"]:
                    if entry["id"] == centry["id"]:
                        entry["canto_state"] = centry["canto_state"]

                        # The entry is removed so that later it's
                        # not a candidate for being appended to the
                        # end of the feed.

                        curfeed["entries"].remove(centry)
                        break
                else:
                    new.append(entry)

                # Apply default state to genuinely new items.
                if "canto_state" not in entry:
                    entry["canto_state"] = self.fd.tags + [u"*"]

            # Tailor the list to the correct number of items. In canto < 0.7.0,
            # you could specify a keep that was lower than the number of items
            # in the feed. This was simply done, but ultimately it caused too
            # much "bounce" for social news feeds. Items get put into the feed,
            # are upvoted enough to be within the first n items, you change
            # their state, they move out of the first n items, are forgotten,
            # then are upvoted again into the first n item and (as far as c-f
            # knows) are treated like brand new items.

            # This will still be a problem if items get taken out of the feed
            # and put back into the feed (and the item isn't in the extra kept
            # items), but then it becomes a site problem, not a reader problem.

            if self.fd.keep and len(newfeed["entries"]) < self.fd.keep:
                newfeed["entries"] += curfeed["entries"]\
                        [:self.fd.keep - len(newfeed["entries"])]

            # Enforce the "never_discard" setting
            # We iterate through the stories and then the tag so that
            # feed order is preserved.

            for e in curfeed["entries"]:
                for tag in self.cfg.never_discard:
                    if tag == "unread":
                        if "read" in e["canto_state"]:
                            continue
                    elif tag not in e["canto_state"]:
                        continue
                    if e not in newfeed["entries"]:
                        newfeed["entries"].append(e)

            if self.cfg.new_hook:
                for entry in [e for e in new if e in newfeed["entries"]]:
                    self.cfg.new_hook(newfeed, entry, entry == new[-1])

            # Dump the output to the new file.

            # Locking and writing is counter-intuitive using fcntl. If you open
            # with "w" and fail to get the lock, the data is still deleted. The
            # solution is to open with "a", get the lock and then truncate the
            # file.

            f = open(self.fpath, "a")
            fcntl.flock(f.fileno(), fcntl.LOCK_EX)

            # The feed was modified out from under us.
            if self.prevtime and self.prevtime != os.stat(self.fpath).st_mtime:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

                # Reread the state from disk.
                newer_curfeed = self.get_curfeed()

                # There was an actual c-f update done, bail.
                if newer_curfeed["canto_update"] != curfeed["canto_update"]:
                    self.log_func("%s updated already, bailing" %
                                  self.fd.tags[0])
                    break

                # Just a state modification by the client, update and continue.
                else:
                    curfeed = newer_curfeed
                    continue

            # Truncate the file
            f.seek(0, 0)
            f.truncate()

            try:
                # Dump the feed item. It's important to flush afterwards to
                # avoid unlocking the file before all the IO is finished.
                cPickle.dump(newfeed, f)
                f.flush()
            except:
                self.log_func("cPickle dump exception on %s" % self.fpath)
            finally:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

            # If we managed to write to disk, break out of the while loop and
            # the thread will exit.

            break
Exemple #4
0
    def run(self):
        curfeed = self.get_curfeed()

        # Determine whether it's been long enough between
        # updates to warrant refetching the feed.

        if time.time() - curfeed["canto_update"] < self.fd.rate * 60 and\
                not self.force:
            return

        # Attempt to set the tag, if unspecified, by grabbing
        # it out of the previously downloaded info.

        if not self.fd.base_set:
            if "feed" in curfeed and "title" in curfeed["feed"]:
                replace = lambda x: x or curfeed["feed"]["title"]
                self.fd.tags = [ replace(x) for x in self.fd.tags]
                self.fd.base_set = 1
                self.log_func("Updating %s" % self.fd.tags[0])
            else:
                # This is the first time we've gotten this URL,
                # so just use the URL since we don't know the title.

                self.log_func("New feed %s" % self.fd.URL)
        else:
            self.log_func("Updating %s" % self.fd.tags[0])

        # This block set newfeed to a parsed feed.

        try:
            # Feed from script
            if self.fd.URL.startswith("script:"):
                script = self.spath + "/" + self.fd.URL[7:]
                out = commands.getoutput(script)
                newfeed = feedparser.parse(out)
            # Feed from URL
            else:
                request = urllib2.Request(self.fd.URL)
                request.add_header('User-Agent',\
                    "Canto/%d.%d.%d + http://codezen.org/canto" %\
                    VERSION_TUPLE)

                # Feed from URL w/ password
                if self.fd.username or self.fd.password:
                    mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
                    domain = urlparse.urlparse(self.fd.URL)[1]
                    mgr.add_password(None, domain,\
                            self.fd.username, self.fd.password)

                    # First, we try Basic Authentication
                    auth = urllib2.HTTPBasicAuthHandler(mgr)
                    opener = urllib2.build_opener(auth)
                    try:
                        newfeed = feedparser.parse(opener.open(request))
                    except:
                        # And, failing that, try Digest Authentication
                        auth = urllib2.HTTPDigestAuthHandler(mgr)
                        opener = urllib2.build_opener(auth)
                        newfeed = feedparser.parse(opener.open(request))
                # Feed with no password.
                else:
                    newfeed = feedparser.parse(\
                            feedparser.urllib2.urlopen(request))
        except:
            # Generally an exception is a connection refusal, but in any
            # case we either won't get data or can't trust the data, so
            # just skip processing this feed.

            enc = locale.getpreferredencoding()
            self.log_func("Exception trying to get feed %s : %s" % \
                    (self.fd.URL.encode(enc, "ignore"), sys.exc_info()[1]))

            return

        # I don't know why feedparser doesn't actually throw this
        # since all URLErrors are basically unrecoverable.

        if "bozo_exception" in newfeed:
            if type(newfeed["bozo_exception"]) == urllib2.URLError:
                self.log_func(\
                    "Feedparser exception getting %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"].reason))
                return
            if not len(newfeed["entries"]):
                self.log_func(\
                    "Feedparser exception, no content in %s : %s, bailing." %\
                    (self.fd.URL, newfeed["bozo_exception"]))
                return

        # Filter out "No Content" message since we apparently have real content

        curfeed["entries"] = [ x for x in curfeed["entries"] if x["id"] !=\
                "canto-internal"]

        # For new feeds whose base tag is still not set, attempt to get a title
        # again.

        if not self.fd.base_set:
            if "feed" not in newfeed or "title" not in newfeed["feed"]:
                self.log_func("Ugh. Defaulting to URL for tag. No guarantees.")
                newfeed["feed"]["title"] = self.fd.URL

            replace = lambda x: x or newfeed["feed"]["title"]
            self.fd.tags = [ replace(x) for x in self.fd.tags]

        # Feedparser returns a very nice dict of information.
        # if there was something wrong with the feed (usu. encodings
        # being mis-declared or missing tags), it sets
        # bozo_exception.

        # These exceptions are recoverable and their objects are
        # un-Picklable so we log it and remove the value.

        if "bozo_exception" in newfeed:
            self.log_func("Recoverable error in feed %s: %s" % 
                        (self.fd.tags[0], newfeed["bozo_exception"]))
            newfeed["bozo_exception"] = None

        # Make state persist between feeds. Currently, this is completely
        # unused, as there's no state information that needs to be propagated.
        # This is a relic from when feeds and tags were the same thing, however
        # it could be useful when doing integration with another client /
        # website and thus, hasn't been removed.

        newfeed["canto_state"] = curfeed["canto_state"]
        newfeed["canto_update"] = time.time()

        # We can set this here, without checking curfeed.
        # Any migration should be done in the get_curfeed function,
        # when the old data is first loaded.

        newfeed["canto_version"] = VERSION_TUPLE

        # For all content that we would usually use, we escape all of the
        # slashes and other potential escapes, except for the link item,
        # which is escaped in the reader when it is displayed. This is to
        # prevent sending garbeled links to the exteranl browser.

        for key in newfeed["feed"]:
            if type(newfeed["feed"][key]) in [unicode,str]:
                newfeed["feed"][key] = utility.stripchars(newfeed["feed"][key])

        for entry in newfeed["entries"]:
            for subitem in ["content","enclosures"]:
                if subitem in entry:
                    for e in entry[subitem]:
                        for k in e.keys():
                            if type(e[k]) in [unicode,str]:
                                e[k] = utility.stripchars(e[k])

            for key in entry.keys():
                if type(entry[key]) in [unicode,str] and key != "link":
                    entry[key] = utility.stripchars(entry[key])

        for entry in newfeed["entries"]:
            # If the item didn't come with a GUID, then
            # use link and then title as an identifier.

            if not "id" in entry:
                if "link" in entry:
                    entry["id"] = entry["link"]
                elif "title" in entry:
                    entry["id"] = entry["title"]
                else:
                    entry["id"] = None

        # Then search through the current feed to
        # make item state persistent, and loop until
        # it's safe to update on disk.

        new = []
        while 1:
            for entry in newfeed["entries"]:
                for centry in curfeed["entries"]:
                    if entry["id"] == centry["id"]:
                        entry["canto_state"] = centry["canto_state"]
                        
                        # The entry is removed so that later it's
                        # not a candidate for being appended to the
                        # end of the feed.

                        curfeed["entries"].remove(centry)
                        break
                else:
                    new.append(entry)

                # Apply default state to genuinely new items.
                if "canto_state" not in entry:
                    entry["canto_state"] = self.fd.tags + [u"*"]

            # Tailor the list to the correct number of items. In canto < 0.7.0,
            # you could specify a keep that was lower than the number of items
            # in the feed. This was simply done, but ultimately it caused too
            # much "bounce" for social news feeds. Items get put into the feed,
            # are upvoted enough to be within the first n items, you change
            # their state, they move out of the first n items, are forgotten,
            # then are upvoted again into the first n item and (as far as c-f
            # knows) are treated like brand new items.

            # This will still be a problem if items get taken out of the feed
            # and put back into the feed (and the item isn't in the extra kept
            # items), but then it becomes a site problem, not a reader problem.

            if self.fd.keep and len(newfeed["entries"]) < self.fd.keep:
                newfeed["entries"] += curfeed["entries"]\
                        [:self.fd.keep - len(newfeed["entries"])]

            # Enforce the "never_discard" setting
            # We iterate through the stories and then the tag so that
            # feed order is preserved.

            for e in curfeed["entries"]:
                for tag in self.cfg.never_discard:
                    if tag == "unread":
                        if "read" in e["canto_state"]:
                            continue
                    elif tag not in e["canto_state"]:
                        continue
                    if e not in newfeed["entries"]:
                        newfeed["entries"].append(e)

            if self.cfg.new_hook:
                for entry in [e for e in new if e in newfeed["entries"]]:
                    self.cfg.new_hook(newfeed, entry, entry == new[-1])

            # Dump the output to the new file.

            # Locking and writing is counter-intuitive using fcntl. If you open
            # with "w" and fail to get the lock, the data is still deleted. The
            # solution is to open with "a", get the lock and then truncate the
            # file.

            f = open(self.fpath, "a")
            fcntl.flock(f.fileno(), fcntl.LOCK_EX)

            # The feed was modified out from under us.
            if self.prevtime and self.prevtime != os.stat(self.fpath).st_mtime:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

                # Reread the state from disk.
                newer_curfeed = self.get_curfeed()

                # There was an actual c-f update done, bail.
                if newer_curfeed["canto_update"] != curfeed["canto_update"]:
                    self.log_func("%s updated already, bailing" %
                            self.fd.tags[0])
                    break

                # Just a state modification by the client, update and continue.
                else:
                    curfeed = newer_curfeed
                    continue

            # Truncate the file
            f.seek(0, 0)
            f.truncate()

            try:
                # Dump the feed item. It's important to flush afterwards to
                # avoid unlocking the file before all the IO is finished.
                cPickle.dump(newfeed, f)
                f.flush()
            except:
                self.log_func("cPickle dump exception on %s" % self.fpath)
            finally:
                # Unlock.
                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
                f.close()

            # If we managed to write to disk, break out of the while loop and
            # the thread will exit.

            break