def test_reject__ng(self): file = open(os.path.join(os.path.dirname(__file__), "..", "..", "filter_cases_ng.txt")) for line in file: url = line.strip() self.assertEqual( (url, True), (url, IronnewsUtility.reject(url))) file.close()
def test_get_canonical_url(self): file = open(os.path.join(os.path.dirname(__file__), "..", "..", "canonical_cases.txt")) for line in file: value, expected = re.split(r"\t", line.strip()) self.assertEqual(expected, IronnewsUtility.get_canonical_url(value)) file.close()
from google_news import GoogleNews from ironnews_utility import IronnewsUtility print "Content-Type: text/plain" print "" KeywordManager.initialize() keyword = KeywordManager.get() print keyword.encode("utf-8") articles = GoogleNews.search(keyword, 30) for article in articles: url = article["url"] title = article["title"] print "---" print url print title.encode("utf-8") if IronnewsUtility.reject(url): print "reject!" continue url2 = IronnewsUtility.get_canonical_url(url) if url2 != url: print "canonical! " + url2 ArticleManager.add(url2, title, Article.CATEGORY_RAIL) KeywordManager.update(keyword)
io.close() print "Content-Type: text/plain" print "" print "feeder" credential = read_credential() articles = get_articles() urls = [article.url for article in articles] random.shuffle(urls) for original_url in urls[0:3]: print "---" print original_url canonical_url = IronnewsUtility.get_canonical_url(original_url) print canonical_url result1 = add_article(credential, canonical_url) print result1 article_id = result1["result"]["1"]["article_id"] result2 = add_tag(credential, article_id, u"googleニュース") print result2 set_registered(original_url)