Example #1
0
class Junkie(object):
  agent = 'Junkie https://github.com/bgnori/Junkie'

  def __init__(self):
    self.posts = {}
    self.cache = Cache('depot', 128)
    self.cache.load_index()
    self.renderer = render.XSLTRenderer('basic.xslt')
    with open('config') as f:
      self.auth = yaml.load(f.read())

  def save(self):
    self.cache.save_entries()
    self.cache.save_index()

  def get(self, url):
    ce = self._cache(url)
    if not ce:
      ce = self._web(url)
    return ce

  def _cache(self, url):
    '''
      retrieve content from cache.
      returns CacheEntry object
    '''
    return self.cache.get(url) 

  def _web(self, url):
    '''
      retrieve content from web.
      returns CacheEntry object
    '''
    entry = self.cache.make_entry(url)

    d = client.getPage(url)
    def onPageArrival(data):
      f = DataFile(data, 'OK') #FIXME Don't guess, use header
      entry.write(f)
      return f
    d.addCallback(onPageArrival)
    def onFail(f):#FIXME
      print 'onFail', f
      entry.abort()
      return 'fail' #FIXME
    d.addErrback(onFail) 
    return entry

  def prefetch(self):
    data_dict = {'start': 0, 'num': 50}
    data_dict.update(self.auth)
    postdata = urllib.urlencode(data_dict)
    d = client.getPage('http://www.tumblr.com/api/dashboard', 
                            method='POST',
                            agent=self.agent,
                            headers = {'Content-Type': 'application/x-www-form-urlencoded'},
                            postdata=postdata)
    def onDataArrival(data):
      if data.startswith('''<!DOCTYPE html PUBLIC "-'''):
        ''' login '''
        print 'ugh! login failed.'
        return None
      with open('dashboard/%s.xml'%(time.time(),), 'w') as f:
        f.write(data)
      self.update_posts(data)
    d.addCallback(onDataArrival).addErrback(printError)


  def update_posts(self, xmldata):
    '''
      parse v1 XML data and store posts.
    '''
    t = etree.XML(xmldata)
    find = etree.XPath('/tumblr/posts/post')
    for post in find(t):
      p = PostFactory(post)
      for url in p.assets_urls():
        self.get(url)
      self.posts[p.id] = p

  def make_dashboard(self):
    '''
      returns html
    '''

    uniques = {}
    #unique by root post
    for p in self.posts.itervalues():
      q = uniques.get(p.reblogged_root_url, None)
      if q is None or q.unix_timestamp > p.unix_timestamp:
        uniques[p.reblogged_root_url] = p

    posts = sorted(uniques.itervalues(), key=lambda x:x.unix_timestamp)
    
    post_div = self.renderer.render(posts)

    html = E.HTML(
      E.HEAD(
        E.META(charset="UTF-8"),
        E.META({"http-equiv":"Content-Type", "content":"text/html;charset=utf-8"}),
        E.TITLE("Mushboard"),
      ),
      E.BODY(
        post_div,
      )
    )
    return etree.tostring(html)