print " <h1>ironnews-crawler status</h1>" print " <h2>keywords</h2>" print " <table border='1'>" for keyword in KeywordManager.all(): print "<tr>" print " <td>" + (keyword.updated_at + datetime.timedelta(hours = 9)).strftime("%Y-%m-%d %H:%M") + "</td>" print " <td>" + keyword.name.encode("utf-8") + "</td>" # FIXME: HTML escape print "</tr>" print " </table>" print " <h2>latest articles</h2>" category_stats = ArticleManager.category_stats() state_stats = ArticleManager.state_stats() print "<table border='1'>" print " <tr>" print " <th colspan='3'>カテゴリ</th>" print " <th colspan='2'>状態</th>" print " </tr>" print " <tr>" print " <th>不明</th>" print " <th>鉄道</th>" print " <th>非鉄</th>" print " <th>未登録</th>" print " <th>登録済</th>" print " </tr>" print " <tr>"
from google_news import GoogleNews from ironnews_utility import IronnewsUtility print "Content-Type: text/plain" print "" KeywordManager.initialize() keyword = KeywordManager.get() print keyword.encode("utf-8") articles = GoogleNews.search(keyword, 30) for article in articles: url = article["url"] title = article["title"] print "---" print url print title.encode("utf-8") if IronnewsUtility.reject(url): print "reject!" continue url2 = IronnewsUtility.get_canonical_url(url) if url2 != url: print "canonical! " + url2 ArticleManager.add(url2, title, Article.CATEGORY_RAIL) KeywordManager.update(keyword)