Example #1
0
 def test_parse_publication3(self):
     snapshot = load_snapshot("snapshot000104-3.html")
     soups = P.parse_soups(snapshot)
     p = appledaily.parse_publication(soups)
     self.assertEqual("【暖心文】專營「不賺錢路線」 花蓮鳳榮行動超市駛進偏鄉 | 蘋果新聞網 | 蘋果日報 ",
                      p["title"])
     self.assertEqual(p["publication_text"], "")
     self.assertEqual(p["data"]["image_urls"], [])
Example #2
0
 def test_parse_published_at(self):
     for case in self.cases:
         try:
             snapshot = load_snapshot(case["f"])
             soups = P.parse_soups(snapshot)
             published_at = P.parse_published_at(soups)
             self.assertEqual(case["published_at"], published_at, case["f"])
         except Exception as e:
             self.fail(f"{e}: {case}")
Example #3
0
 def test_parse_ga_id(self):
     for case in self.cases:
         try:
             snapshot = load_snapshot(case["f"])
             soups = P.parse_soups(snapshot)
             ga_id = P.parse_ga_id(soups)
             self.assertEqual(set(case["ga-id"]), set(ga_id), case["f"])
         except Exception as e:
             self.fail(f"{e}: {case}")
Example #4
0
 def test_parse_publication2(self):
     snapshot = load_snapshot("fbsnapshot002.html")
     soups = P.parse_soups(snapshot)
     p = fb.parse_publication(soups)
     self.assertTrue("吸毒" in p["data"]["hashtags"])
     self.assertTrue("喪屍" in p["data"]["hashtags"])
     self.assertEqual("這對夫妻也太沒責任感了吧! # 吸毒 # 喪屍", p["publication_text"])
     self.assertEqual("1", p["data"]["reactions"]["shares"])
     self.assertTrue("https://kairos.news/135385" in p["data"]["urls"])
Example #5
0
 def test_parse_publication1(self):
     snapshot = load_snapshot("snapshot000104.html")
     soups = P.parse_soups(snapshot)
     p = appledaily.parse_publication(soups)
     self.assertEqual("9個QA解答《蘋果》訂閱問題 客服電話電郵看這裡 | 蘋果新聞網 | 蘋果日報 ",
                      p["title"])
     self.assertEqual(
         0, p["publication_text"].find("《蘋果新聞網》每月只要120元,比一碗牛肉麵還便宜,即可無限暢覽;"))
     self.assertTrue("如有疑問歡迎來電或請來信,我們竭誠為您服務。" in p["publication_text"])
     self.assertIsNone(p["connect_from"])
Example #6
0
 def test_parse_fb_app_id(self):
     for case in self.cases:
         try:
             snapshot = load_snapshot(case["f"])
             soups = P.parse_soups(snapshot)
             if "fb:app_id" in soups.metatags:
                 fb_app_id = soups.metatags["fb:app_id"]
                 self.assertEqual(case["fb:app_id"], fb_app_id, case["f"])
             else:
                 self.assertEqual(case["fb:app_id"], "", case["f"])
         except Exception as e:
             self.fail(f"{e}: {case}")
Example #7
0
 def test_parse_publication2(self):
     snapshot = load_snapshot("snapshot000104-2.html")
     soups = P.parse_soups(snapshot)
     p = appledaily.parse_publication(soups)
     self.assertEqual("【名醫家惡火】賴宅有無裝設住警器 火調報告1個月出爐 | 蘋果新聞網 | 蘋果日報 ",
                      p["title"])
     self.assertEqual(
         0, p["publication_text"].find("高醫前院長賴文德位於高雄住家昨發生惡火,夫妻雖幸運獲救"))
     self.assertTrue(
         "火場內部隔間裝潢與堆放雜物,才會導致火勢來得迅速猛烈,難以逃生" in p["publication_text"])
     self.assertTrue(
         "https://arc-photo-appledaily.s3.amazonaws.com/ap-ne-1-prod/public/3XOWF3QRUZJYODGW6YYFO4CLSA.jpg"
         in p["data"]["image_urls"])
Example #8
0
 def test_parse_publication1(self):
     snapshot = load_snapshot("fbsnapshot001.html")
     soups = P.parse_soups(snapshot)
     p = fb.parse_publication(soups)
     self.assertTrue(p["data"]["hashtags"] == list())
     self.assertEqual(
         "難怪-為什麼我總是聽不懂總統府的發言,原來是請一個渣男在靠北。 講的畜語害我都聽不懂。 我好生氣喔~ 但請國人別忘記蔡政府要國人吃含瘦肉精豬肉嘿。 花錢又傷身,毒害台灣。",
         p["publication_text"],
     )
     self.assertTrue(
         "https://scontent.ftpe8-3.fna.fbcdn.net/v/t1.0-0/p526x296/118825781_3662271600473224_4051157518986893414_o.jpg?_nc_cat=102&_nc_sid=110474&_nc_ohc=pXGE2gwr6BQAX-U0Mo8&_nc_ht=scontent.ftpe8-3.fna&tp=6&oh=2fe62327647a337d3490dfb5accb746c&oe=5F8E2CB2"
         in p["data"]["image_urls"])
     self.assertEqual("1K", p["data"]["reactions"]["shares"])
Example #9
0
 def setUp(self):
     with open(
             Path(__file__).parent / "snapshots/snapshot000015.html",
             "r") as fh:
         self.soups = P.parse_soups(
             Snapshot(
                 site_id=0,
                 url="https://www.toutiao.com/a6822529318262931976/",
                 snapshot_at=0,
                 first_seen_at=0,
                 last_updated_at=0,
                 raw_data=fh.read(),
                 article_type="Article",
             ))
Example #10
0
 def setUp(self):
     with open(
             Path(__file__).parent /
             "snapshots/snapshot000098-pttread.html", "r") as fh:
         self.soups = P.parse_soups(
             Snapshot(
                 site_id=0,
                 url=
                 "https://www.ptt.cc/bbs/Gossiping/M.1582843734.A.FB5.html",
                 snapshot_at=0,
                 first_seen_at=0,
                 last_updated_at=0,
                 raw_data=fh.read(),
                 article_type="PTT",
             ))