def pages(thread): centipede=Centipede.get_by_key_name(thread['url']) d={} urls=[] logging.info(thread['url']) centipede_url_components=urlparse.urlparse(thread['url']) centipede_url_netloc_path=centipede_url_components.netloc + centipede_url_components.path for page in scrapemark.scrape(""" <div class="pages" id="pageDivTop"> {* <a href="{{ [pages] }}"></a> *} <span></span> </div> """, url=thread['url'])['pages'][:-1]: d[page]=1 if centipede is None: centipede=Centipede(key_name=thread['url'], species=db.Category(u'天涯经济'), author=thread['author'], title=thread['title'], comments=thread['comments'], views=thread['views'], pedes=[]) urls=[db.Link(thread['url'])] urls.extend([db.Link(key) for key in d.keys()[:-2]]) qr_key=centipede_url_components.netloc + '.'.join([centipede_url_components.path.split('.')[0],'png']) img=urlfetch.fetch('http://chart.apis.google.com/chart?cht=qr&chs=200x200&chl='+urllib2.quote(host_url+centipede_url_netloc_path)) qr_content=StaticContent(key_name=qr_key, body=img.content, content_type='image/png') qr_content.put() else: urls=[db.Link(centipede.next)] urls.extend([db.Link(url) for url in d.keys()[:-2] if url not in centipede.pedes]) logging.info(urls) centipede.pedes.extend(urls) centipede.next=db.Link(d.keys()[-2]) for url in urls: yield url centipede.put() content=StaticContent.get_by_key_name(centipede_url_netloc_path) stanzas=[stanza for stanza in new_stanzas(thread, centipede)] if content is None: content=StaticContent(key_name=centipede_url_netloc_path, template=db.Text(template('centipede.html', centipede=centipede, stanzas=stanzas, template_next=True)), content_type='text/html') else: content.template=db.Text(template(content.template, centipede=centipede, stanzas=stanzas, template_next=True)) # db.put(stanzas) content.put() memcache.delete(content.key().name())