def post(self, path): content = self.request.get('content') page = PageContent(title=path, content=content) page.put() self.render('wikipage.html', logged_in=True, path=path[1:], content=page.content)
def defer_fetch(url, site_id, is_index=False): logging.info('fetching...%s' % url) site_config = fetch_config[site_id] if is_index: result = urlfetch.fetch(url) news_url = get_news_urls(site_id, result.content.decode(site_config["encoding"]).encode('utf-8')) for _url in news_url: taskqueue.add(url='/start_fetch', params={'url': _url, 'site_id': site_id}) else: if is_exsiting(url): return # contents includes: title, content if site_id in ('jwc',): result = urlfetch.fetch(url) contents = parse_page(result.content) else: # 以下是 readability parser api 的输出示例: # http://www.readability.com/api/content/v1/parser?token=16208e14fab764c70989011f1f26fc8c71b85451&url=http://news.scu.edu.cn/news2012/cdzx/webinfo/2013/03/1343288895583976.htm # encode 是为了防止 url 包含中文时, 下面的 urlencode 抛错。url 变量默认是 unicode 的。 payload = {"url": url.encode(site_config['encoding']), "token": "16208e14fab764c70989011f1f26fc8c71b85451"} payload = urllib.urlencode(payload) result = urlfetch.fetch("http://www.readability.com/api/content/v1/parser", payload=payload, method=urlfetch.POST, headers={'Content-Type': 'application/x-www-form-urlencoded'} ) contents = result.content contents = json.loads(contents) try: p = PageContent(url=url, site_id=site_id, title=contents['title'], content=unescape(contents['content'])) p.put() except KeyError as e: # 如果 readability parse 出错 logging.error("Error: %s" % e) logging.error("url: %s" % url) logging.error("payload: %s" % payload) pass