def reader_render_links(self, dict): if not dict["show_links"]: return dict["content"] += "\n" for idx, link in enumerate(dict["links"]): if link[2] == "link": color = u"%4" elif link[2] == "image": color = u"%7" else: color = u"%8" dict["content"] += color + u"[" + unicode(idx) + u"] " + \ link[0] + u"%1 - " + utility.stripchars(link[1]) + "\n"
def run(self): curfeed = self.get_curfeed() # Determine whether it's been long enough between # updates to warrant refetching the feed. if time.time() - curfeed["canto_update"] < self.fd.rate * 60 and\ not self.force: return # Attempt to set the tag, if unspecified, by grabbing # it out of the previously downloaded info. if not self.fd.base_set: if "feed" in curfeed and "title" in curfeed["feed"]: replace = lambda x: x or curfeed["feed"]["title"] self.fd.tags = [replace(x) for x in self.fd.tags] self.fd.base_set = 1 self.log_func("Updating %s" % self.fd.tags[0]) else: # This is the first time we've gotten this URL, # so just use the URL since we don't know the title. self.log_func("New feed %s" % self.fd.URL) else: self.log_func("Updating %s" % self.fd.tags[0]) # This block set newfeed to a parsed feed. try: # Feed from script if self.fd.URL.startswith("script:"): script = self.spath + "/" + self.fd.URL[7:] out = commands.getoutput(script) newfeed = feedparser.parse(out) # Feed from URL else: request = urllib2.Request(self.fd.URL) request.add_header('User-Agent',\ "Canto/%d.%d.%d + http://codezen.org/canto" %\ VERSION_TUPLE) # Feed from URL w/ password if self.fd.username or self.fd.password: mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() domain = urlparse.urlparse(self.fd.URL)[1] mgr.add_password(None, domain,\ self.fd.username, self.fd.password) # First, we try Basic Authentication auth = urllib2.HTTPBasicAuthHandler(mgr) opener = urllib2.build_opener(auth) try: newfeed = feedparser.parse(opener.open(request)) except: # And, failing that, try Digest Authentication auth = urllib2.HTTPDigestAuthHandler(mgr) opener = urllib2.build_opener(auth) newfeed = feedparser.parse(opener.open(request)) # Feed with no password. else: newfeed = feedparser.parse(\ feedparser.urllib2.urlopen(request)) except: # Generally an exception is a connection refusal, but in any # case we either won't get data or can't trust the data, so # just skip processing this feed. enc = locale.getpreferredencoding() self.log_func("Exception trying to get feed %s : %s" % \ (self.fd.URL.encode(enc, "ignore"), sys.exc_info()[1])) return # I don't know why feedparser doesn't actually throw this # since all URLErrors are basically unrecoverable. if "bozo_exception" in newfeed: if type(newfeed["bozo_exception"]) == urllib2.URLError: self.log_func(\ "Feedparser exception getting %s : %s, bailing." %\ (self.fd.URL, newfeed["bozo_exception"].reason)) return if not len(newfeed["entries"]): self.log_func(\ "Feedparser exception, no content in %s : %s, bailing." %\ (self.fd.URL, newfeed["bozo_exception"])) return # Filter out "No Content" message since we apparently have real content curfeed["entries"] = [ x for x in curfeed["entries"] if x["id"] !=\ "canto-internal"] # For new feeds whose base tag is still not set, attempt to get a title # again. if not self.fd.base_set: if "feed" not in newfeed or "title" not in newfeed["feed"]: self.log_func("Ugh. Defaulting to URL for tag. No guarantees.") newfeed["feed"]["title"] = self.fd.URL replace = lambda x: x or newfeed["feed"]["title"] self.fd.tags = [replace(x) for x in self.fd.tags] # Feedparser returns a very nice dict of information. # if there was something wrong with the feed (usu. encodings # being mis-declared or missing tags), it sets # bozo_exception. # These exceptions are recoverable and their objects are # un-Picklable so we log it and remove the value. if "bozo_exception" in newfeed: self.log_func("Recoverable error in feed %s: %s" % (self.fd.tags[0], newfeed["bozo_exception"])) newfeed["bozo_exception"] = None # Make state persist between feeds. Currently, this is completely # unused, as there's no state information that needs to be propagated. # This is a relic from when feeds and tags were the same thing, however # it could be useful when doing integration with another client / # website and thus, hasn't been removed. newfeed["canto_state"] = curfeed["canto_state"] newfeed["canto_update"] = time.time() # We can set this here, without checking curfeed. # Any migration should be done in the get_curfeed function, # when the old data is first loaded. newfeed["canto_version"] = VERSION_TUPLE # For all content that we would usually use, we escape all of the # slashes and other potential escapes, except for the link item, # which is escaped in the reader when it is displayed. This is to # prevent sending garbeled links to the exteranl browser. for key in newfeed["feed"]: if type(newfeed["feed"][key]) in [unicode, str]: newfeed["feed"][key] = utility.stripchars(newfeed["feed"][key]) for entry in newfeed["entries"]: for subitem in ["content", "enclosures"]: if subitem in entry: for e in entry[subitem]: for k in e.keys(): if type(e[k]) in [unicode, str]: e[k] = utility.stripchars(e[k]) for key in entry.keys(): if type(entry[key]) in [unicode, str] and key != "link": entry[key] = utility.stripchars(entry[key]) for entry in newfeed["entries"]: # If the item didn't come with a GUID, then # use link and then title as an identifier. if not "id" in entry: if "link" in entry: entry["id"] = entry["link"] elif "title" in entry: entry["id"] = entry["title"] else: entry["id"] = None # Then search through the current feed to # make item state persistent, and loop until # it's safe to update on disk. new = [] while 1: for entry in newfeed["entries"]: for centry in curfeed["entries"]: if entry["id"] == centry["id"]: entry["canto_state"] = centry["canto_state"] # The entry is removed so that later it's # not a candidate for being appended to the # end of the feed. curfeed["entries"].remove(centry) break else: new.append(entry) # Apply default state to genuinely new items. if "canto_state" not in entry: entry["canto_state"] = self.fd.tags + [u"*"] # Tailor the list to the correct number of items. In canto < 0.7.0, # you could specify a keep that was lower than the number of items # in the feed. This was simply done, but ultimately it caused too # much "bounce" for social news feeds. Items get put into the feed, # are upvoted enough to be within the first n items, you change # their state, they move out of the first n items, are forgotten, # then are upvoted again into the first n item and (as far as c-f # knows) are treated like brand new items. # This will still be a problem if items get taken out of the feed # and put back into the feed (and the item isn't in the extra kept # items), but then it becomes a site problem, not a reader problem. if self.fd.keep and len(newfeed["entries"]) < self.fd.keep: newfeed["entries"] += curfeed["entries"]\ [:self.fd.keep - len(newfeed["entries"])] # Enforce the "never_discard" setting # We iterate through the stories and then the tag so that # feed order is preserved. for e in curfeed["entries"]: for tag in self.cfg.never_discard: if tag == "unread": if "read" in e["canto_state"]: continue elif tag not in e["canto_state"]: continue if e not in newfeed["entries"]: newfeed["entries"].append(e) if self.cfg.new_hook: for entry in [e for e in new if e in newfeed["entries"]]: self.cfg.new_hook(newfeed, entry, entry == new[-1]) # Dump the output to the new file. # Locking and writing is counter-intuitive using fcntl. If you open # with "w" and fail to get the lock, the data is still deleted. The # solution is to open with "a", get the lock and then truncate the # file. f = open(self.fpath, "a") fcntl.flock(f.fileno(), fcntl.LOCK_EX) # The feed was modified out from under us. if self.prevtime and self.prevtime != os.stat(self.fpath).st_mtime: # Unlock. fcntl.flock(f.fileno(), fcntl.LOCK_UN) f.close() # Reread the state from disk. newer_curfeed = self.get_curfeed() # There was an actual c-f update done, bail. if newer_curfeed["canto_update"] != curfeed["canto_update"]: self.log_func("%s updated already, bailing" % self.fd.tags[0]) break # Just a state modification by the client, update and continue. else: curfeed = newer_curfeed continue # Truncate the file f.seek(0, 0) f.truncate() try: # Dump the feed item. It's important to flush afterwards to # avoid unlocking the file before all the IO is finished. cPickle.dump(newfeed, f) f.flush() except: self.log_func("cPickle dump exception on %s" % self.fpath) finally: # Unlock. fcntl.flock(f.fileno(), fcntl.LOCK_UN) f.close() # If we managed to write to disk, break out of the while loop and # the thread will exit. break
def run(self): curfeed = self.get_curfeed() # Determine whether it's been long enough between # updates to warrant refetching the feed. if time.time() - curfeed["canto_update"] < self.fd.rate * 60 and\ not self.force: return # Attempt to set the tag, if unspecified, by grabbing # it out of the previously downloaded info. if not self.fd.base_set: if "feed" in curfeed and "title" in curfeed["feed"]: replace = lambda x: x or curfeed["feed"]["title"] self.fd.tags = [ replace(x) for x in self.fd.tags] self.fd.base_set = 1 self.log_func("Updating %s" % self.fd.tags[0]) else: # This is the first time we've gotten this URL, # so just use the URL since we don't know the title. self.log_func("New feed %s" % self.fd.URL) else: self.log_func("Updating %s" % self.fd.tags[0]) # This block set newfeed to a parsed feed. try: # Feed from script if self.fd.URL.startswith("script:"): script = self.spath + "/" + self.fd.URL[7:] out = commands.getoutput(script) newfeed = feedparser.parse(out) # Feed from URL else: request = urllib2.Request(self.fd.URL) request.add_header('User-Agent',\ "Canto/%d.%d.%d + http://codezen.org/canto" %\ VERSION_TUPLE) # Feed from URL w/ password if self.fd.username or self.fd.password: mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() domain = urlparse.urlparse(self.fd.URL)[1] mgr.add_password(None, domain,\ self.fd.username, self.fd.password) # First, we try Basic Authentication auth = urllib2.HTTPBasicAuthHandler(mgr) opener = urllib2.build_opener(auth) try: newfeed = feedparser.parse(opener.open(request)) except: # And, failing that, try Digest Authentication auth = urllib2.HTTPDigestAuthHandler(mgr) opener = urllib2.build_opener(auth) newfeed = feedparser.parse(opener.open(request)) # Feed with no password. else: newfeed = feedparser.parse(\ feedparser.urllib2.urlopen(request)) except: # Generally an exception is a connection refusal, but in any # case we either won't get data or can't trust the data, so # just skip processing this feed. enc = locale.getpreferredencoding() self.log_func("Exception trying to get feed %s : %s" % \ (self.fd.URL.encode(enc, "ignore"), sys.exc_info()[1])) return # I don't know why feedparser doesn't actually throw this # since all URLErrors are basically unrecoverable. if "bozo_exception" in newfeed: if type(newfeed["bozo_exception"]) == urllib2.URLError: self.log_func(\ "Feedparser exception getting %s : %s, bailing." %\ (self.fd.URL, newfeed["bozo_exception"].reason)) return if not len(newfeed["entries"]): self.log_func(\ "Feedparser exception, no content in %s : %s, bailing." %\ (self.fd.URL, newfeed["bozo_exception"])) return # Filter out "No Content" message since we apparently have real content curfeed["entries"] = [ x for x in curfeed["entries"] if x["id"] !=\ "canto-internal"] # For new feeds whose base tag is still not set, attempt to get a title # again. if not self.fd.base_set: if "feed" not in newfeed or "title" not in newfeed["feed"]: self.log_func("Ugh. Defaulting to URL for tag. No guarantees.") newfeed["feed"]["title"] = self.fd.URL replace = lambda x: x or newfeed["feed"]["title"] self.fd.tags = [ replace(x) for x in self.fd.tags] # Feedparser returns a very nice dict of information. # if there was something wrong with the feed (usu. encodings # being mis-declared or missing tags), it sets # bozo_exception. # These exceptions are recoverable and their objects are # un-Picklable so we log it and remove the value. if "bozo_exception" in newfeed: self.log_func("Recoverable error in feed %s: %s" % (self.fd.tags[0], newfeed["bozo_exception"])) newfeed["bozo_exception"] = None # Make state persist between feeds. Currently, this is completely # unused, as there's no state information that needs to be propagated. # This is a relic from when feeds and tags were the same thing, however # it could be useful when doing integration with another client / # website and thus, hasn't been removed. newfeed["canto_state"] = curfeed["canto_state"] newfeed["canto_update"] = time.time() # We can set this here, without checking curfeed. # Any migration should be done in the get_curfeed function, # when the old data is first loaded. newfeed["canto_version"] = VERSION_TUPLE # For all content that we would usually use, we escape all of the # slashes and other potential escapes, except for the link item, # which is escaped in the reader when it is displayed. This is to # prevent sending garbeled links to the exteranl browser. for key in newfeed["feed"]: if type(newfeed["feed"][key]) in [unicode,str]: newfeed["feed"][key] = utility.stripchars(newfeed["feed"][key]) for entry in newfeed["entries"]: for subitem in ["content","enclosures"]: if subitem in entry: for e in entry[subitem]: for k in e.keys(): if type(e[k]) in [unicode,str]: e[k] = utility.stripchars(e[k]) for key in entry.keys(): if type(entry[key]) in [unicode,str] and key != "link": entry[key] = utility.stripchars(entry[key]) for entry in newfeed["entries"]: # If the item didn't come with a GUID, then # use link and then title as an identifier. if not "id" in entry: if "link" in entry: entry["id"] = entry["link"] elif "title" in entry: entry["id"] = entry["title"] else: entry["id"] = None # Then search through the current feed to # make item state persistent, and loop until # it's safe to update on disk. new = [] while 1: for entry in newfeed["entries"]: for centry in curfeed["entries"]: if entry["id"] == centry["id"]: entry["canto_state"] = centry["canto_state"] # The entry is removed so that later it's # not a candidate for being appended to the # end of the feed. curfeed["entries"].remove(centry) break else: new.append(entry) # Apply default state to genuinely new items. if "canto_state" not in entry: entry["canto_state"] = self.fd.tags + [u"*"] # Tailor the list to the correct number of items. In canto < 0.7.0, # you could specify a keep that was lower than the number of items # in the feed. This was simply done, but ultimately it caused too # much "bounce" for social news feeds. Items get put into the feed, # are upvoted enough to be within the first n items, you change # their state, they move out of the first n items, are forgotten, # then are upvoted again into the first n item and (as far as c-f # knows) are treated like brand new items. # This will still be a problem if items get taken out of the feed # and put back into the feed (and the item isn't in the extra kept # items), but then it becomes a site problem, not a reader problem. if self.fd.keep and len(newfeed["entries"]) < self.fd.keep: newfeed["entries"] += curfeed["entries"]\ [:self.fd.keep - len(newfeed["entries"])] # Enforce the "never_discard" setting # We iterate through the stories and then the tag so that # feed order is preserved. for e in curfeed["entries"]: for tag in self.cfg.never_discard: if tag == "unread": if "read" in e["canto_state"]: continue elif tag not in e["canto_state"]: continue if e not in newfeed["entries"]: newfeed["entries"].append(e) if self.cfg.new_hook: for entry in [e for e in new if e in newfeed["entries"]]: self.cfg.new_hook(newfeed, entry, entry == new[-1]) # Dump the output to the new file. # Locking and writing is counter-intuitive using fcntl. If you open # with "w" and fail to get the lock, the data is still deleted. The # solution is to open with "a", get the lock and then truncate the # file. f = open(self.fpath, "a") fcntl.flock(f.fileno(), fcntl.LOCK_EX) # The feed was modified out from under us. if self.prevtime and self.prevtime != os.stat(self.fpath).st_mtime: # Unlock. fcntl.flock(f.fileno(), fcntl.LOCK_UN) f.close() # Reread the state from disk. newer_curfeed = self.get_curfeed() # There was an actual c-f update done, bail. if newer_curfeed["canto_update"] != curfeed["canto_update"]: self.log_func("%s updated already, bailing" % self.fd.tags[0]) break # Just a state modification by the client, update and continue. else: curfeed = newer_curfeed continue # Truncate the file f.seek(0, 0) f.truncate() try: # Dump the feed item. It's important to flush afterwards to # avoid unlocking the file before all the IO is finished. cPickle.dump(newfeed, f) f.flush() except: self.log_func("cPickle dump exception on %s" % self.fpath) finally: # Unlock. fcntl.flock(f.fileno(), fcntl.LOCK_UN) f.close() # If we managed to write to disk, break out of the while loop and # the thread will exit. break