def test_zhihu_search_result(self):
     html = self.html('zhihu_search_result.html')
     result = extract_list(html)
     print(jsonify(result), len(result))
     self.assertEqual(len(result), 56)
 def test_netease_rolling_news(self):
     html = self.html('netease_rolling_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 40)
 def test_tencent_important_news(self):
     # TODO: this test case is wrong
     html = self.html('tencent_important_news.html')
     result = extract_list(html)
     print(jsonify(result), len(result))
     self.assertEqual(len(result), 10)
 def test_netease_leaderboard_news(self):
     html = self.html('netease_leaderboard_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 700)
 def test_netease_international_news(self):
     html = self.html('netease_international_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 7)
 def test_rtfund_xxpl(self):
     html = self.html('rtfund_xxpl.html')
     result = extract_list(html, base_url='http://www.rtfund.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 15)
 def test_hsqhfunds_announcement(self):
     html = self.html('hsqhfunds_announcement.html')
     result = extract_list(html, base_url='https://www.hsqhfunds.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 20)
 def test_dfa66_announcement(self):
     html = self.html('dfa66_announcement.html')
     result = extract_list(html, base_url='https://www.dfa66.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 10)
 def test_hrfund_announcement(self):
     html = self.html('hrfund_announcement.html')
     result = extract_list(html, base_url='http://www.hr-fund.com.cn/news')
     print(jsonify(result))
     self.assertEqual(len(result), 10)
예제 #10
0
from gerapy_auto_extractor import extract_list, extract_detail, jsonify

html = open('samples/list/sample.html', encoding='utf-8').read()
print(jsonify(extract_list(html)))

html = open('samples/content/sample.html', encoding='utf-8').read()
print(jsonify(extract_detail(html)))