Esempio n. 1
0
 def post(self, path):
     content = self.request.get('content')
     
     page = PageContent(title=path, content=content)
     page.put()
     
     self.render('wikipage.html', logged_in=True, path=path[1:], content=page.content)
Esempio n. 2
0
def defer_fetch(url, site_id, is_index=False):

    logging.info('fetching...%s' % url)

    site_config = fetch_config[site_id]

    if is_index:
        result = urlfetch.fetch(url)
        news_url = get_news_urls(site_id, result.content.decode(site_config["encoding"]).encode('utf-8'))
        for _url in news_url:
            taskqueue.add(url='/start_fetch', params={'url': _url, 'site_id': site_id})
    else:
        if is_exsiting(url):
            return

        # contents includes: title, content
        if site_id in ('jwc',):
            result = urlfetch.fetch(url)
            contents = parse_page(result.content)
        else:
            # 以下是 readability parser api 的输出示例:
            # http://www.readability.com/api/content/v1/parser?token=16208e14fab764c70989011f1f26fc8c71b85451&url=http://news.scu.edu.cn/news2012/cdzx/webinfo/2013/03/1343288895583976.htm

            # encode 是为了防止 url 包含中文时, 下面的 urlencode 抛错。url 变量默认是 unicode 的。
            payload = {"url": url.encode(site_config['encoding']), "token": "16208e14fab764c70989011f1f26fc8c71b85451"}
            payload = urllib.urlencode(payload)
            result = urlfetch.fetch("http://www.readability.com/api/content/v1/parser",
                                    payload=payload,
                                    method=urlfetch.POST,
                                    headers={'Content-Type': 'application/x-www-form-urlencoded'}
                                    )
            contents = result.content
            contents = json.loads(contents)
            try:
                p = PageContent(url=url, site_id=site_id, title=contents['title'], content=unescape(contents['content']))
                p.put()
            except KeyError as e:  # 如果 readability parse 出错
                logging.error("Error: %s" % e)
                logging.error("url: %s" % url)
                logging.error("payload: %s" % payload)
                pass