def test_parse_publication3(self): snapshot = load_snapshot("snapshot000104-3.html") soups = P.parse_soups(snapshot) p = appledaily.parse_publication(soups) self.assertEqual("【暖心文】專營「不賺錢路線」 花蓮鳳榮行動超市駛進偏鄉 | 蘋果新聞網 | 蘋果日報 ", p["title"]) self.assertEqual(p["publication_text"], "") self.assertEqual(p["data"]["image_urls"], [])
def test_parse_published_at(self): for case in self.cases: try: snapshot = load_snapshot(case["f"]) soups = P.parse_soups(snapshot) published_at = P.parse_published_at(soups) self.assertEqual(case["published_at"], published_at, case["f"]) except Exception as e: self.fail(f"{e}: {case}")
def test_parse_ga_id(self): for case in self.cases: try: snapshot = load_snapshot(case["f"]) soups = P.parse_soups(snapshot) ga_id = P.parse_ga_id(soups) self.assertEqual(set(case["ga-id"]), set(ga_id), case["f"]) except Exception as e: self.fail(f"{e}: {case}")
def test_parse_publication2(self): snapshot = load_snapshot("fbsnapshot002.html") soups = P.parse_soups(snapshot) p = fb.parse_publication(soups) self.assertTrue("吸毒" in p["data"]["hashtags"]) self.assertTrue("喪屍" in p["data"]["hashtags"]) self.assertEqual("這對夫妻也太沒責任感了吧! # 吸毒 # 喪屍", p["publication_text"]) self.assertEqual("1", p["data"]["reactions"]["shares"]) self.assertTrue("https://kairos.news/135385" in p["data"]["urls"])
def test_parse_publication1(self): snapshot = load_snapshot("snapshot000104.html") soups = P.parse_soups(snapshot) p = appledaily.parse_publication(soups) self.assertEqual("9個QA解答《蘋果》訂閱問題 客服電話電郵看這裡 | 蘋果新聞網 | 蘋果日報 ", p["title"]) self.assertEqual( 0, p["publication_text"].find("《蘋果新聞網》每月只要120元,比一碗牛肉麵還便宜,即可無限暢覽;")) self.assertTrue("如有疑問歡迎來電或請來信,我們竭誠為您服務。" in p["publication_text"]) self.assertIsNone(p["connect_from"])
def test_parse_fb_app_id(self): for case in self.cases: try: snapshot = load_snapshot(case["f"]) soups = P.parse_soups(snapshot) if "fb:app_id" in soups.metatags: fb_app_id = soups.metatags["fb:app_id"] self.assertEqual(case["fb:app_id"], fb_app_id, case["f"]) else: self.assertEqual(case["fb:app_id"], "", case["f"]) except Exception as e: self.fail(f"{e}: {case}")
def test_parse_publication2(self): snapshot = load_snapshot("snapshot000104-2.html") soups = P.parse_soups(snapshot) p = appledaily.parse_publication(soups) self.assertEqual("【名醫家惡火】賴宅有無裝設住警器 火調報告1個月出爐 | 蘋果新聞網 | 蘋果日報 ", p["title"]) self.assertEqual( 0, p["publication_text"].find("高醫前院長賴文德位於高雄住家昨發生惡火,夫妻雖幸運獲救")) self.assertTrue( "火場內部隔間裝潢與堆放雜物,才會導致火勢來得迅速猛烈,難以逃生" in p["publication_text"]) self.assertTrue( "https://arc-photo-appledaily.s3.amazonaws.com/ap-ne-1-prod/public/3XOWF3QRUZJYODGW6YYFO4CLSA.jpg" in p["data"]["image_urls"])
def test_parse_publication1(self): snapshot = load_snapshot("fbsnapshot001.html") soups = P.parse_soups(snapshot) p = fb.parse_publication(soups) self.assertTrue(p["data"]["hashtags"] == list()) self.assertEqual( "難怪-為什麼我總是聽不懂總統府的發言,原來是請一個渣男在靠北。 講的畜語害我都聽不懂。 我好生氣喔~ 但請國人別忘記蔡政府要國人吃含瘦肉精豬肉嘿。 花錢又傷身,毒害台灣。", p["publication_text"], ) self.assertTrue( "https://scontent.ftpe8-3.fna.fbcdn.net/v/t1.0-0/p526x296/118825781_3662271600473224_4051157518986893414_o.jpg?_nc_cat=102&_nc_sid=110474&_nc_ohc=pXGE2gwr6BQAX-U0Mo8&_nc_ht=scontent.ftpe8-3.fna&tp=6&oh=2fe62327647a337d3490dfb5accb746c&oe=5F8E2CB2" in p["data"]["image_urls"]) self.assertEqual("1K", p["data"]["reactions"]["shares"])
def setUp(self): with open( Path(__file__).parent / "snapshots/snapshot000015.html", "r") as fh: self.soups = P.parse_soups( Snapshot( site_id=0, url="https://www.toutiao.com/a6822529318262931976/", snapshot_at=0, first_seen_at=0, last_updated_at=0, raw_data=fh.read(), article_type="Article", ))
def setUp(self): with open( Path(__file__).parent / "snapshots/snapshot000098-pttread.html", "r") as fh: self.soups = P.parse_soups( Snapshot( site_id=0, url= "https://www.ptt.cc/bbs/Gossiping/M.1582843734.A.FB5.html", snapshot_at=0, first_seen_at=0, last_updated_at=0, raw_data=fh.read(), article_type="PTT", ))