Example #1
0
print "  <h1>ironnews-crawler status</h1>"

print "  <h2>keywords</h2>"
print "  <table border='1'>"

for keyword in KeywordManager.all():
  print "<tr>"
  print " <td>" + (keyword.updated_at + datetime.timedelta(hours = 9)).strftime("%Y-%m-%d %H:%M") + "</td>"
  print " <td>" + keyword.name.encode("utf-8") + "</td>" # FIXME: HTML escape
  print "</tr>"

print "  </table>"

print "  <h2>latest articles</h2>"

category_stats = ArticleManager.category_stats()
state_stats    = ArticleManager.state_stats()

print "<table border='1'>"
print " <tr>"
print "  <th colspan='3'>カテゴリ</th>"
print "  <th colspan='2'>状態</th>"
print " </tr>"
print " <tr>"
print "  <th>不明</th>"
print "  <th>鉄道</th>"
print "  <th>非鉄</th>"
print "  <th>未登録</th>"
print "  <th>登録済</th>"
print " </tr>"
print " <tr>"
from google_news import GoogleNews

from ironnews_utility import IronnewsUtility

print "Content-Type: text/plain"
print ""

KeywordManager.initialize()

keyword = KeywordManager.get()
print keyword.encode("utf-8")

articles = GoogleNews.search(keyword, 30)
for article in articles:
  url   = article["url"]
  title = article["title"]
  print "---"
  print url
  print title.encode("utf-8")
  if IronnewsUtility.reject(url):
    print "reject!"
    continue

  url2 = IronnewsUtility.get_canonical_url(url)
  if url2 != url:
    print "canonical! " + url2

  ArticleManager.add(url2, title, Article.CATEGORY_RAIL)

KeywordManager.update(keyword)