def post(self, path): content = self.request.get('content') page = PageContent(title=path, content=content) page.put() self.render('wikipage.html', logged_in=True, path=path[1:], content=page.content)
def get(self, site_id=None, count=None): if (not site_id) or (not count) or (int(count) > 10): template_attrs = { "fetch_config": fetch_config, "title": "API", "error_msg": "*count* should smaller than or equal to 10." if count and (int(count) > 10) else None } api_template = jinja_environment.get_template('api_page.html') self.response.write(api_template.render(template_attrs)) return count = int(count) results = PageContent.query(PageContent.site_id == site_id)\ .order(-PageContent.create_at).fetch(count) results = map(lambda x: x.to_json(), results) results = { "pages": results, "count": count, "site_id": site_id, } results = json.dumps(results, ensure_ascii=False) self.response.write(results)
def post(self, path): page = PageContent.all().filter("title =", path).get() content = self.request.get('content') page.content = content page.put() self.redirect(page.title)
def defer_fetch(url, site_id, is_index=False): logging.info('fetching...%s' % url) site_config = fetch_config[site_id] if is_index: result = urlfetch.fetch(url) news_url = get_news_urls(site_id, result.content.decode(site_config["encoding"]).encode('utf-8')) for _url in news_url: taskqueue.add(url='/start_fetch', params={'url': _url, 'site_id': site_id}) else: if is_exsiting(url): return # contents includes: title, content if site_id in ('jwc',): result = urlfetch.fetch(url) contents = parse_page(result.content) else: # 以下是 readability parser api 的输出示例: # http://www.readability.com/api/content/v1/parser?token=16208e14fab764c70989011f1f26fc8c71b85451&url=http://news.scu.edu.cn/news2012/cdzx/webinfo/2013/03/1343288895583976.htm # encode 是为了防止 url 包含中文时, 下面的 urlencode 抛错。url 变量默认是 unicode 的。 payload = {"url": url.encode(site_config['encoding']), "token": "16208e14fab764c70989011f1f26fc8c71b85451"} payload = urllib.urlencode(payload) result = urlfetch.fetch("http://www.readability.com/api/content/v1/parser", payload=payload, method=urlfetch.POST, headers={'Content-Type': 'application/x-www-form-urlencoded'} ) contents = result.content contents = json.loads(contents) try: p = PageContent(url=url, site_id=site_id, title=contents['title'], content=unescape(contents['content'])) p.put() except KeyError as e: # 如果 readability parse 出错 logging.error("Error: %s" % e) logging.error("url: %s" % url) logging.error("payload: %s" % payload) pass
def get(self, path): user_id = self.request.cookies.get('user_id') # Redirect back to home of no page title is in url if path == '/': self.redirect('/') if user_id: # Check if page in db page = PageContent.all().filter("title =", path).get() if page: self.render('wikipage.html', path = path[1:], logged_in=True, edit=True, display_edit="none", content=page.content) else: self.render('wikipage.html', path = path[1:], logged_in=True, edit=True, display_edit="none") else: self.redirect('/')
def get(self, path): user_id = self.request.cookies.get('user_id') if user_id: cached_page = memcache.get(path[1:]) if cached_page: page = cached_page else: page = PageContent.all().filter("title =", path).get() if page: memcache.add(page.title, page) self.render('wikipage.html', logged_in=True, path=path[1:], content=page.content) else: self.render('wikipage.html', path = path[1:], logged_in=True, edit=True, display_edit="none") else: self.redirect('/')
def is_exsiting(url): return PageContent.query(PageContent.url == url).get()