def make_planet(subdir, output_dir=None, name="", user="", email=""): """ Makes a planet on disk and in the db, copying the skeleton directory on disk. Does not seed the planet with default values for owner or email. """ if not validate_input(subdir): raise BadSubdirNameError, subdir if not output_dir: output_dir = opt['output_dir'] path = os.path.join(output_dir, subdir) with our_db('planets') as db: if os.path.exists(path) and not subdir in db: log.debug("Exists on disk but not in db, attempting to delete") shutil.rmtree(path) try: shutil.copytree(opt['new_planet_dir'], path, symlinks=True) except(OSError), errstr: if os.path.exists(path): msg = "%s planet already exists. Please choose another subdirectory name." % subdir err.add(msg) log.info(msg) return False err.add("Couldn't create planet: %s" % errstr) return False
def dump_cache(self): for url in self.feeds: print url with our_db("cache") as db: cache = db[url] print json.dumps(cache)
def not_in_db_test(s): name = "delete_test" p = make_temp_planet(name, True) with our_db('planets') as db: del db[name] p.delete() p = Planet(direc=name)
def delete_unused_feeds(self): planets = [] with our_db('planets') as db: for k in db.keys(): planets.append(Planet(db[k])) feed_urls = {} for p in planets: for f in p.feeds: feed_urls[f] = f feed_urls = feed_urls.keys() with our_db('cache') as db: for k in db.keys(): if not k in feed_urls: del db[k] log.debug("Removed %s from cache." % k)
def delete(self): with our_db('planets') as db: del db[self.direc] try: shutil.rmtree(os.path.join(cfg.OUTPUT_DIR, self.direc)) except OSError: pass log.info("Deleted planet: %s" % self.direc)
def __init__(self, *args, **kwargs): if "direc" in kwargs: with our_db("planets") as db: self.load_dict(db[kwargs["direc"]]) elif isinstance(args[0], basestring): self.load_json(args[0]) elif isinstance(args[0], dict): self.load_dict(args[0]) else: self.load_dict(kwargs)
def save(self, update_config_timestamp=False, ignore_missing_dir=False): output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc) if not ignore_missing_dir and not os.path.exists(output_dir): log.info("Can't find %s directory. Skipping save." % output_dir) return log.debug("Saving the planet! %s" % self.direc) if update_config_timestamp: self.last_config_change = time.time() with our_db("planets") as db: # db[self.direc.encode("utf-8")] = self.serializable() db[self.direc] = self.serializable()
def update_feed(self, url): """Download feed if it's out of date""" force_check = opt['force_check'] with our_db('cache') as db: try: cache = db[url] except KeyError: log.info("Can't find %s in cache. Making default." % url) cache = {'data':'', 'last_downloaded':0, 'dload_fail':False} force_check = True except json.decoder.JSONDecodeError, e: log.debug("Json error on updating url %s: %s" % (url, e)) cache = {'data':'', 'last_downloaded':0, 'dload_fail':False} force_check = True
def update_feed(self, url): """Download feed if it's out of date""" force_check = opt["force_check"] with our_db("cache") as db: try: # cache = db[url.encode("utf-8")] cache = db[url] except KeyError: log.info("Can't find %s in cache. Making default." % url) cache = {"data": "", "last_downloaded": 0, "dload_fail": False} force_check = True except json.decoder.JSONDecodeError, e: log.debug("Json error on updating url %s: %s" % (url, e)) cache = {"data": "", "last_downloaded": 0, "dload_fail": False} force_check = True
def save_cache(self, cache, url): with our_db("cache") as db: # db[url.encode("utf-8")] = cache db[url] = cache
def delete_if_missing(self): output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc) if not os.path.exists(output_dir): with our_db("planets") as db: del db[self.direc] log.info("Deleted missing planet: %s" % self.direc)
def generate(self): output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc) if not os.path.exists(output_dir): log.info("Can't find %s directory. Skipping generate." % output_dir) return print "Generating %s" % output_dir lopt = { "owner_name": self.user, "owner_email": self.email, "title": self.name, "feed_url": "%s%s/atom.xml" % (cfg.BASE_HREF, self.direc), "opml_url": "%s%s/opml.xml" % (cfg.BASE_HREF, self.direc), "feed_page": "%s%s/" % (cfg.BASE_HREF, self.direc), "updated": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.last_downloaded)), "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "datemodified": time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(self.last_downloaded)), } ## Get the entries and sort them entries = {} lopt["Feeds"] = [] for url, f in self.feeds.items(): with our_db("cache") as db: if not url in db: continue try: # cache = db[url.encode("utf-8")] cache = db[url] except json.decoder.JSONDecodeError, e: log.debug("Json error on generating url %s: %s" % (url, e)) continue parsed = cache["data"] if not parsed or not parsed["entries"]: log.debug("No data for %s. Skipping." % url) continue for e in parsed["entries"]: e["name"] = f["name"] e["links"] = parsed["feed"]["links"] e["feed_name"] = smart_str(parsed["feed"]["title"], encoding="ascii", errors="ignore") e["channel_title_plain"] = e["feed_name"] e["channel_image"] = f["image"] e["channel_name"] = e["feed_name"] if "subtitle" in parsed["feed"]: e["subtitle"] = parsed["feed"]["subtitle"] else: e["subtitle"] = "" if parsed["feed"]["link"].endswith("/"): e["channel_link"] = e["feed_id"] = parsed["feed"]["link"] else: e["channel_link"] = e["feed_id"] = parsed["feed"]["link"] + "/" if "updated" in e: e["date"] = dateutil.parser.parse(e["updated"]).strftime("%Y-%m-%d %H:%M:%S") e["updated"] = dateutil.parser.parse(e["updated"]).isoformat() elif "published_parsed" in e: e["date"] = dateutil.parser.parse(e["published_parsed"]["__value__"]).strftime("%Y-%m-%d %H:%M:%S") e["updated"] = dateutil.parser.parse(e["published_parsed"]["__value__"]).isoformat() else: e["date"] = e["updated"] = "1970-01-01T00:00:00Z" # We really should assume the blog post is from when it is first seen for lack of a better option # e['date'] = e['updated'] = datetime.now().strftime("%Y-%m-%dT%H:00Z") log.debug("No updated or date field in entry for %s" % url) # pretty_print_dict(e) if not "id" in e: e["id"] = e["link"] if not "link" in e: e["link"] = e["id"] if not e["id"] and not e["link"]: log.debug("%s has neither id nor link" % e["feed_name"]) entries[e["id"]] = e ## OPML template stuff and sidebar stuff feed_data = {} for l in parsed["feed"]["links"]: if not "type" in l: l["type"] = "text/html" if l["rel"] == "self": feed_data["url"] = l["href"] elif l["rel"] == "alternate": if "href" in l: feed_data["link"] = l["href"] feed_data["author"] = f["name"] feed_data["title"] = smart_str(parsed["feed"]["title"], encoding="ascii", errors="ignore") feed_data["image"] = f["image"] if "feedurl" in f: feed_data["url"] = f["feedurl"] else: log.error("%s is missing the feedurl key. Falling back to url" % url) feed_data["url"] = f["url"] lopt["Feeds"].append(feed_data)
def load(self): with our_db('planets') as db: for k in db.keys(): if not self.selected or k in self.selected: self.append(Planet(db[k]))
#!/usr/bin/python # convert from shelf to sqlite import sys from util import our_db, sqlite_db import simplejson as json with sqlite_db('planets') as sdb: sdb.clear() with our_db('planets') as odb: for key, val in odb.items(): val = json.loads(val) sdb[key]=val sys.exit() with sqlite_db('cache') as sdb: sdb.clear() with our_db('cache') as odb: for key, val in odb.items(): val = json.loads(val) sdb[key]=val
def generate(self): output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc) if not os.path.exists(output_dir): log.info("Can't find %s directory. Skipping generate." % output_dir) return print "Generating %s" % output_dir lopt = {'owner_name':self.user, 'owner_email':self.email, 'title':self.name, 'feed_url':"%s%s/atom.xml" % (cfg.BASE_HREF, self.direc), 'opml_url':"%s%s/opml.xml" % (cfg.BASE_HREF, self.direc), 'feed_page':"%s%s/" % (cfg.BASE_HREF, self.direc), 'updated':time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(self.last_downloaded)), 'date':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()), 'datemodified':time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(self.last_downloaded)), } ## Get the entries and sort them entries = {} lopt['Feeds']=[] for url, f in self.feeds.items(): with our_db('cache') as db: if not url in db: continue try: cache = db[url] except json.decoder.JSONDecodeError, e: log.debug("Json error on generating url %s: %s" % (url, e)) continue parsed = cache['data'] if not parsed or not parsed['entries']: log.debug("No data for %s. Skipping." % url) continue for e in parsed['entries']: e['name'] = f['name'] if 'links' in parsed['feed']: e['links'] = parsed['feed']['links'] else: e['links'] = [] if 'title' in parsed['feed']: e['feed_name'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore') else: e['feed_name'] = f['name'] e['channel_title_plain'] = e['feed_name'] e['channel_image'] = f['image'] e['channel_name'] = e['feed_name'] if 'subtitle' in parsed['feed']: e['subtitle'] = parsed['feed']['subtitle'] else: e['subtitle']='' if 'link' in parsed['feed']: if parsed['feed']['link'].endswith('/'): e['channel_link'] = e['feed_id'] = parsed['feed']['link'] else: e['channel_link'] = e['feed_id'] = parsed['feed']['link']+'/' else: e['channel_link'] = e['feed_id'] = f['feedurl'] if 'updated' in e: e['date'] = dateutil.parser.parse(e['updated']).strftime("%Y-%m-%d %H:%M:%S") e['updated'] = dateutil.parser.parse(e['updated']).isoformat() elif 'published_parsed' in e: if e['published_parsed'] is None: log.debug('%s has published date that could not be parsed' % e['feed_name']) else: if len(e['published_parsed']) == 9: e['date'] = time.strftime("%Y-%m-%d %H:%M:%S", e['published_parsed']) e['updated'] = datetime.date.fromtimestamp(time.mktime(e['published_parsed'])).isoformat() else: e['date'] = dateutil.parser.parse(e['published_parsed']['__value__']).strftime("%Y-%m-%d %H:%M:%S") e['updated'] = dateutil.parser.parse(e['published_parsed']['__value__']).isoformat() else: e['date'] = e['updated'] = '1970-01-01T00:00:00Z' # We really should assume the blog post is from when it is first seen for lack of a better option #e['date'] = e['updated'] = datetime.now().strftime("%Y-%m-%dT%H:00Z") log.debug("No updated or date field in entry for %s" % url) #pretty_print_dict(e) if not 'id' in e: e['id'] = e['link'] if not 'link' in e: e['link'] = e['id'] if not e['id'] and not e['link']: log.debug('%s has neither id nor link' % e['feed_name']) entries[e['id']] = e ## OPML template stuff and sidebar stuff feed_data = {} # Default these to the feed itself if 'feedurl' in f: feed_data['url'] = f['feedurl'] feed_data['link'] = f['feedurl'] for l in e['links']: if not 'type' in l: l['type']='text/html' if l['rel']=="self": feed_data['url'] = l['href'] elif l['rel']=="alternate": if 'href' in l: feed_data['link'] = l['href'] feed_data['author'] = f['name'] if 'title' in parsed['feed']: feed_data['title'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore') else: feed_data['title'] = f['name'] feed_data['image'] = f['image'] if 'feedurl' in f: feed_data['url'] = f['feedurl'] else: log.error("%s is missing the feedurl key. Falling back to url" % url) feed_data['url'] = f['url'] lopt['Feeds'].append(feed_data)
def generate(self): output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc) if not os.path.exists(output_dir): log.info("Can't find %s directory. Skipping generate." % output_dir) return print "Generating %s" % output_dir lopt = {'owner_name':self.user, 'title':self.name, 'feed_url':"%s%s/atom.xml" % (cfg.BASE_HREF, self.direc), 'opml_url':"%s%s/opml.xml" % (cfg.BASE_HREF, self.direc), 'feed_page':"%s%s/" % (cfg.BASE_HREF, self.direc), 'updated':time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(self.last_downloaded)), 'date':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()), 'datemodified':time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(self.last_downloaded)), } ## Get the entries and sort them entries = {} lopt['Feeds']=[] for url, f in self.feeds.items(): with our_db('cache') as db: if not url in db: continue try: #cache = db[url.encode("utf-8")] cache = db[url] except json.decoder.JSONDecodeError, e: log.debug("Json error on generating url %s: %s" % (url, e)) continue parsed = cache['data'] if not parsed or not parsed['entries']: log.debug("No data for %s. Skipping." % url) continue for e in parsed['entries']: e['name'] = f['name'] e['links'] = parsed['feed']['links'] e['feed_name'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore') e['channel_title_plain'] = e['feed_name'] e['channel_image'] = f['image'] e['channel_name'] = e['feed_name'] if 'subtitle' in parsed['feed']: e['subtitle'] = parsed['feed']['subtitle'] else: e['subtitle']='' if parsed['feed']['link'].endswith('/'): e['channel_link'] = e['feed_id'] = parsed['feed']['link'] else: e['channel_link'] = e['feed_id'] = parsed['feed']['link']+'/' try: e['date'] = dateutil.parser.parse(e['updated']).strftime("%Y-%m-%d %H:%M:%S") e['updated'] = dateutil.parser.parse(e['updated']).isoformat() except KeyError: e['date'] = e['updated'] = '1970-01-01T00:00:00Z' log.debug("No updated field in entry for %s" % url) if not 'id' in e: e['id'] = e['link'] if not 'link' in e: e['link'] = e['id'] if not e['id'] and not e['link']: log.debug('%s has neither id nor link' % e['feed_name']) entries[e['id']] = e ## OPML template stuff and sidebar stuff feed_data = {} for l in parsed['feed']['links']: if not 'type' in l: l['type']='text/html' if l['rel']=="self": feed_data['url'] = l['href'] elif l['rel']=="alternate": if 'href' in l: feed_data['link'] = l['href'] feed_data['author'] = f['name'] feed_data['title'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore') feed_data['image'] = f['image'] if 'feedurl' in f: feed_data['url'] = f['feedurl'] else: log.error("%s is missing the feedurl key. Falling back to url" % url) feed_data['url'] = f['url'] lopt['Feeds'].append(feed_data)