Esempio n. 1
0
def make_planet(subdir, output_dir=None,
                name="", user="", email=""):
   """
   Makes a planet on disk and in the db, copying the skeleton
   directory on disk.  Does not seed the planet with default values
   for owner or email.
   """
   if not validate_input(subdir):
      raise BadSubdirNameError, subdir

   if not output_dir:
      output_dir = opt['output_dir']

   path = os.path.join(output_dir, subdir)
   
   with our_db('planets') as db:
      if os.path.exists(path) and not subdir in db:
         log.debug("Exists on disk but not in db, attempting to delete")
         shutil.rmtree(path)

   try:
      shutil.copytree(opt['new_planet_dir'], path, symlinks=True)
   except(OSError), errstr:
      if os.path.exists(path):
         msg = "%s planet already exists. Please choose another subdirectory name." % subdir
         err.add(msg)
         log.info(msg)
         return False
      err.add("Couldn't create planet: %s" % errstr)
      return False
Esempio n. 2
0
    def dump_cache(self):
        for url in self.feeds:
            print url
            with our_db("cache") as db:
                cache = db[url]

            print json.dumps(cache)
Esempio n. 3
0
 def not_in_db_test(s):
     name = "delete_test"
     p = make_temp_planet(name, True)
     with our_db('planets') as db:
         del db[name]
     p.delete()
     p = Planet(direc=name)
Esempio n. 4
0
   def delete_unused_feeds(self):
      planets = []
      with our_db('planets') as db:
         for k in db.keys():
            planets.append(Planet(db[k]))

      feed_urls = {}
      for p in planets:
         for f in p.feeds:
            feed_urls[f] = f

      feed_urls = feed_urls.keys()
      with our_db('cache') as db:
         for k in db.keys():
            if not k in feed_urls:
               del db[k]
               log.debug("Removed %s from cache." % k)
Esempio n. 5
0
 def delete(self):
    with our_db('planets') as db:
       del db[self.direc]
    try:
       shutil.rmtree(os.path.join(cfg.OUTPUT_DIR, self.direc))
    except OSError:
       pass
    log.info("Deleted planet: %s" % self.direc)
Esempio n. 6
0
 def __init__(self, *args, **kwargs):
     if "direc" in kwargs:
         with our_db("planets") as db:
             self.load_dict(db[kwargs["direc"]])
     elif isinstance(args[0], basestring):
         self.load_json(args[0])
     elif isinstance(args[0], dict):
         self.load_dict(args[0])
     else:
         self.load_dict(kwargs)
Esempio n. 7
0
    def save(self, update_config_timestamp=False, ignore_missing_dir=False):
        output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc)
        if not ignore_missing_dir and not os.path.exists(output_dir):
            log.info("Can't find %s directory.  Skipping save." % output_dir)
            return

        log.debug("Saving the planet! %s" % self.direc)
        if update_config_timestamp:
            self.last_config_change = time.time()
        with our_db("planets") as db:
            # db[self.direc.encode("utf-8")] = self.serializable()
            db[self.direc] = self.serializable()
Esempio n. 8
0
   def update_feed(self, url):
      """Download feed if it's out of date"""

      force_check = opt['force_check']
      with our_db('cache') as db:
         try:
            cache = db[url]
         except KeyError:
            log.info("Can't find %s in cache.  Making default." % url)
            cache = {'data':'', 'last_downloaded':0, 'dload_fail':False}
            force_check = True
         except json.decoder.JSONDecodeError, e:
            log.debug("Json error on updating url %s: %s" % (url, e))
            cache = {'data':'', 'last_downloaded':0, 'dload_fail':False}
            force_check = True
Esempio n. 9
0
    def update_feed(self, url):
        """Download feed if it's out of date"""

        force_check = opt["force_check"]
        with our_db("cache") as db:
            try:
                # cache = db[url.encode("utf-8")]
                cache = db[url]
            except KeyError:
                log.info("Can't find %s in cache.  Making default." % url)
                cache = {"data": "", "last_downloaded": 0, "dload_fail": False}
                force_check = True
            except json.decoder.JSONDecodeError, e:
                log.debug("Json error on updating url %s: %s" % (url, e))
                cache = {"data": "", "last_downloaded": 0, "dload_fail": False}
                force_check = True
Esempio n. 10
0
 def save_cache(self, cache, url):
     with our_db("cache") as db:
         # db[url.encode("utf-8")] = cache
         db[url] = cache
Esempio n. 11
0
 def delete_if_missing(self):
     output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc)
     if not os.path.exists(output_dir):
         with our_db("planets") as db:
             del db[self.direc]
         log.info("Deleted missing planet: %s" % self.direc)
Esempio n. 12
0
    def generate(self):
        output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc)
        if not os.path.exists(output_dir):
            log.info("Can't find %s directory.  Skipping generate." % output_dir)
            return
        print "Generating %s" % output_dir
        lopt = {
            "owner_name": self.user,
            "owner_email": self.email,
            "title": self.name,
            "feed_url": "%s%s/atom.xml" % (cfg.BASE_HREF, self.direc),
            "opml_url": "%s%s/opml.xml" % (cfg.BASE_HREF, self.direc),
            "feed_page": "%s%s/" % (cfg.BASE_HREF, self.direc),
            "updated": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.last_downloaded)),
            "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
            "datemodified": time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(self.last_downloaded)),
        }

        ## Get the entries and sort them
        entries = {}
        lopt["Feeds"] = []
        for url, f in self.feeds.items():
            with our_db("cache") as db:
                if not url in db:
                    continue
                try:
                    # cache = db[url.encode("utf-8")]
                    cache = db[url]
                except json.decoder.JSONDecodeError, e:
                    log.debug("Json error on generating url %s: %s" % (url, e))
                    continue

            parsed = cache["data"]
            if not parsed or not parsed["entries"]:
                log.debug("No data for %s.  Skipping." % url)
                continue

            for e in parsed["entries"]:
                e["name"] = f["name"]
                e["links"] = parsed["feed"]["links"]
                e["feed_name"] = smart_str(parsed["feed"]["title"], encoding="ascii", errors="ignore")
                e["channel_title_plain"] = e["feed_name"]
                e["channel_image"] = f["image"]
                e["channel_name"] = e["feed_name"]
                if "subtitle" in parsed["feed"]:
                    e["subtitle"] = parsed["feed"]["subtitle"]
                else:
                    e["subtitle"] = ""
                if parsed["feed"]["link"].endswith("/"):
                    e["channel_link"] = e["feed_id"] = parsed["feed"]["link"]
                else:
                    e["channel_link"] = e["feed_id"] = parsed["feed"]["link"] + "/"

                if "updated" in e:
                    e["date"] = dateutil.parser.parse(e["updated"]).strftime("%Y-%m-%d %H:%M:%S")
                    e["updated"] = dateutil.parser.parse(e["updated"]).isoformat()
                elif "published_parsed" in e:
                    e["date"] = dateutil.parser.parse(e["published_parsed"]["__value__"]).strftime("%Y-%m-%d %H:%M:%S")
                    e["updated"] = dateutil.parser.parse(e["published_parsed"]["__value__"]).isoformat()
                else:
                    e["date"] = e["updated"] = "1970-01-01T00:00:00Z"
                    # We really should assume the blog post is from when it is first seen for lack of a better option
                    # e['date'] = e['updated'] = datetime.now().strftime("%Y-%m-%dT%H:00Z")
                    log.debug("No updated or date field in entry for %s" % url)
                    # pretty_print_dict(e)
                if not "id" in e:
                    e["id"] = e["link"]
                if not "link" in e:
                    e["link"] = e["id"]
                if not e["id"] and not e["link"]:
                    log.debug("%s has neither id nor link" % e["feed_name"])
                entries[e["id"]] = e

            ## OPML template stuff and sidebar stuff
            feed_data = {}
            for l in parsed["feed"]["links"]:
                if not "type" in l:
                    l["type"] = "text/html"
                if l["rel"] == "self":
                    feed_data["url"] = l["href"]
                elif l["rel"] == "alternate":
                    if "href" in l:
                        feed_data["link"] = l["href"]
            feed_data["author"] = f["name"]
            feed_data["title"] = smart_str(parsed["feed"]["title"], encoding="ascii", errors="ignore")
            feed_data["image"] = f["image"]
            if "feedurl" in f:
                feed_data["url"] = f["feedurl"]
            else:
                log.error("%s is missing the feedurl key.  Falling back to url" % url)
                feed_data["url"] = f["url"]
            lopt["Feeds"].append(feed_data)
Esempio n. 13
0
 def load(self):
    with our_db('planets') as db:
       for k in db.keys():
          if not self.selected or k in self.selected:
             self.append(Planet(db[k]))
Esempio n. 14
0
#!/usr/bin/python

# convert from shelf to sqlite

import sys
from util import our_db, sqlite_db
import simplejson as json

with sqlite_db('planets') as sdb:
   sdb.clear()
   with our_db('planets') as odb:
      for key, val in odb.items():
         val = json.loads(val)
         sdb[key]=val
sys.exit()
with sqlite_db('cache') as sdb:
   sdb.clear()
   with our_db('cache') as odb:
      for key, val in odb.items():
         val = json.loads(val)
         sdb[key]=val

Esempio n. 15
0
   def generate(self):
      output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc)
      if not os.path.exists(output_dir):
         log.info("Can't find %s directory.  Skipping generate." % output_dir)
         return
      print "Generating %s" % output_dir
      lopt = {'owner_name':self.user,
              'owner_email':self.email,
              'title':self.name,
              'feed_url':"%s%s/atom.xml" % (cfg.BASE_HREF, self.direc),
              'opml_url':"%s%s/opml.xml" % (cfg.BASE_HREF, self.direc),
              'feed_page':"%s%s/" % (cfg.BASE_HREF, self.direc),
              'updated':time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(self.last_downloaded)),
              'date':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),
              'datemodified':time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(self.last_downloaded)),
              }

      ## Get the entries and sort them
      entries = {}
      lopt['Feeds']=[]
      for url, f in self.feeds.items():
         with our_db('cache') as db:
            if not url in db:
               continue
            try:
               cache = db[url]
            except json.decoder.JSONDecodeError, e:
               log.debug("Json error on generating url %s: %s" % (url, e))
               continue

         parsed = cache['data']
         if not parsed or not parsed['entries']:
            log.debug("No data for %s.  Skipping." % url)
            continue
         
         for e in parsed['entries']:
            e['name'] = f['name']
            if 'links' in parsed['feed']:
               e['links'] = parsed['feed']['links']
            else:
               e['links'] = []
            if 'title' in parsed['feed']:
               e['feed_name'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore')
            else:
               e['feed_name'] = f['name']
            e['channel_title_plain'] = e['feed_name']
            e['channel_image'] = f['image']
            e['channel_name'] = e['feed_name']
            if 'subtitle' in parsed['feed']:
               e['subtitle'] = parsed['feed']['subtitle']
            else:
               e['subtitle']=''
            if 'link' in parsed['feed']:
               if parsed['feed']['link'].endswith('/'):
                  e['channel_link'] = e['feed_id'] = parsed['feed']['link']
               else:
                  e['channel_link'] = e['feed_id'] = parsed['feed']['link']+'/'
            else:
               e['channel_link'] = e['feed_id'] = f['feedurl']
            if 'updated' in e:
               e['date'] = dateutil.parser.parse(e['updated']).strftime("%Y-%m-%d %H:%M:%S")
               e['updated'] = dateutil.parser.parse(e['updated']).isoformat()
            elif 'published_parsed' in e:
               if e['published_parsed'] is None:
                  log.debug('%s has published date that could not be parsed' % e['feed_name'])
               else:
                  if len(e['published_parsed']) == 9:
                     e['date'] = time.strftime("%Y-%m-%d %H:%M:%S", e['published_parsed'])
                     e['updated'] = datetime.date.fromtimestamp(time.mktime(e['published_parsed'])).isoformat()
                  else:
                     e['date'] = dateutil.parser.parse(e['published_parsed']['__value__']).strftime("%Y-%m-%d %H:%M:%S")
                     e['updated'] = dateutil.parser.parse(e['published_parsed']['__value__']).isoformat()
            else:
               e['date'] = e['updated'] = '1970-01-01T00:00:00Z'
               # We really should assume the blog post is from when it is first seen for lack of a better option
               #e['date'] = e['updated'] = datetime.now().strftime("%Y-%m-%dT%H:00Z")
               log.debug("No updated or date field in entry for %s" % url)
               #pretty_print_dict(e)
            if not 'id' in e: e['id'] = e['link']
            if not 'link' in e: e['link'] = e['id']
            if not e['id'] and not e['link']:
               log.debug('%s has neither id nor link' % e['feed_name'])
            entries[e['id']] = e

         ## OPML template stuff and sidebar stuff
         feed_data = {}

         # Default these to the feed itself
         if 'feedurl' in f:
            feed_data['url'] = f['feedurl']
            feed_data['link'] = f['feedurl']

         for l in e['links']:
            if not 'type' in l:
               l['type']='text/html'
            if l['rel']=="self":
               feed_data['url'] = l['href']
            elif l['rel']=="alternate":
               if 'href' in l:
                  feed_data['link'] = l['href']
         feed_data['author'] = f['name']
         if 'title' in parsed['feed']:
            feed_data['title'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore')
         else:
            feed_data['title'] = f['name']
         feed_data['image'] = f['image']
         if 'feedurl' in f:
            feed_data['url'] = f['feedurl']
         else:
            log.error("%s is missing the feedurl key.  Falling back to url" % url)
            feed_data['url'] = f['url']
         lopt['Feeds'].append(feed_data)
Esempio n. 16
0
   def generate(self):
      output_dir = os.path.join(cfg.OUTPUT_DIR, self.direc)
      if not os.path.exists(output_dir):
         log.info("Can't find %s directory.  Skipping generate." % output_dir)
         return
      print "Generating %s" % output_dir

      lopt = {'owner_name':self.user,
              'title':self.name,
              'feed_url':"%s%s/atom.xml" % (cfg.BASE_HREF, self.direc),
              'opml_url':"%s%s/opml.xml" % (cfg.BASE_HREF, self.direc),
              'feed_page':"%s%s/" % (cfg.BASE_HREF, self.direc),
              'updated':time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(self.last_downloaded)),
              'date':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),
              'datemodified':time.strftime("%a, %d %b %Y %H:%M:%S GMT",time.gmtime(self.last_downloaded)),
              }

      ## Get the entries and sort them
      entries = {}
      lopt['Feeds']=[]
      for url, f in self.feeds.items():
         with our_db('cache') as db:
            if not url in db:
               continue
            try:
               #cache = db[url.encode("utf-8")]
               cache = db[url]
            except json.decoder.JSONDecodeError, e:
               log.debug("Json error on generating url %s: %s" % (url, e))
               continue

         parsed = cache['data']
         if not parsed or not parsed['entries']:
            log.debug("No data for %s.  Skipping." % url)
            continue
         
         for e in parsed['entries']:
            e['name'] = f['name']
            e['links'] = parsed['feed']['links']
            e['feed_name'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore')
            e['channel_title_plain'] = e['feed_name']
            e['channel_image'] = f['image']
            e['channel_name'] = e['feed_name']
            if 'subtitle' in parsed['feed']:
               e['subtitle'] = parsed['feed']['subtitle']
            else:
               e['subtitle']=''
            if parsed['feed']['link'].endswith('/'):
               e['channel_link'] = e['feed_id'] = parsed['feed']['link']
            else:
               e['channel_link'] = e['feed_id'] = parsed['feed']['link']+'/'

            try:
               e['date'] = dateutil.parser.parse(e['updated']).strftime("%Y-%m-%d %H:%M:%S")
               e['updated'] = dateutil.parser.parse(e['updated']).isoformat()
            except KeyError:
               e['date'] = e['updated'] = '1970-01-01T00:00:00Z'
               log.debug("No updated field in entry for %s" % url)

            if not 'id' in e: e['id'] = e['link']
            if not 'link' in e: e['link'] = e['id']
            if not e['id'] and not e['link']:
               log.debug('%s has neither id nor link' % e['feed_name'])
            entries[e['id']] = e

         ## OPML template stuff and sidebar stuff
         feed_data = {}
         for l in parsed['feed']['links']:
            if not 'type' in l:
               l['type']='text/html'
            if l['rel']=="self":
               feed_data['url'] = l['href']
            elif l['rel']=="alternate":
               if 'href' in l:
                  feed_data['link'] = l['href']
         feed_data['author'] = f['name']
         feed_data['title'] = smart_str(parsed['feed']['title'], encoding='ascii', errors='ignore')
         feed_data['image'] = f['image']
         if 'feedurl' in f:
            feed_data['url'] = f['feedurl']
         else:
            log.error("%s is missing the feedurl key.  Falling back to url" % url)
            feed_data['url'] = f['url']
         lopt['Feeds'].append(feed_data)