def test_zhihu_search_result(self):
     html = self.html('zhihu_search_result.html')
     result = extract_list(html)
     print(jsonify(result), len(result))
     self.assertEqual(len(result), 56)
 def test_netease_rolling_news(self):
     html = self.html('netease_rolling_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 40)
 def test_tencent_important_news(self):
     # TODO: this test case is wrong
     html = self.html('tencent_important_news.html')
     result = extract_list(html)
     print(jsonify(result), len(result))
     self.assertEqual(len(result), 10)
 def test_netease_leaderboard_news(self):
     html = self.html('netease_leaderboard_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 700)
 def test_netease_international_news(self):
     html = self.html('netease_international_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 7)
 def test_rtfund_xxpl(self):
     html = self.html('rtfund_xxpl.html')
     result = extract_list(html, base_url='http://www.rtfund.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 15)
 def test_hsqhfunds_announcement(self):
     html = self.html('hsqhfunds_announcement.html')
     result = extract_list(html, base_url='https://www.hsqhfunds.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 20)
 def test_dfa66_announcement(self):
     html = self.html('dfa66_announcement.html')
     result = extract_list(html, base_url='https://www.dfa66.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 10)
 def test_hrfund_announcement(self):
     html = self.html('hrfund_announcement.html')
     result = extract_list(html, base_url='http://www.hr-fund.com.cn/news')
     print(jsonify(result))
     self.assertEqual(len(result), 10)
예제 #10
0
from gerapy_auto_extractor import extract_list, extract_detail, is_detail, is_list, probability_of_detail, \
    probability_of_list
from gerapy_auto_extractor.helpers import content, jsonify

html = content('samples/list/sample.html')
print(jsonify(extract_list(html)))

html = content('samples/detail/sample.html')
print(jsonify(extract_detail(html)))

html = content('samples/detail/sample.html')
print(probability_of_detail(html), probability_of_list(html))
print(is_detail(html), is_list(html))

html = content('samples/list/sample.html')
print(probability_of_detail(html), probability_of_list(html))
print(
    is_detail(html),
    is_list(html),
)