コード例 #1
0
 def test_case1(self):
     result = None
     content = requests.get(
         "https://www.qukuaiwang.com.cn/Index/ku_detail/kid/4812.html").text
     if is_list(content):
         result = extract_list(content)
     print(probability_of_list(content))
     self.assertEqual(bool(result), True)
コード例 #2
0
 def test_zhihu_search_result(self):
     html = self.html('zhihu_search_result.html')
     result = extract_list(html)
     print(jsonify(result), len(result))
     self.assertEqual(len(result), 56)
コード例 #3
0
 def test_netease_rolling_news(self):
     html = self.html('netease_rolling_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 40)
コード例 #4
0
 def test_tencent_important_news(self):
     # TODO: this test case is wrong
     html = self.html('tencent_important_news.html')
     result = extract_list(html)
     print(jsonify(result), len(result))
     self.assertEqual(len(result), 10)
コード例 #5
0
 def test_netease_leaderboard_news(self):
     html = self.html('netease_leaderboard_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 700)
コード例 #6
0
 def test_netease_international_news(self):
     html = self.html('netease_international_news.html')
     result = extract_list(html)
     print(jsonify(result))
     self.assertEqual(len(result), 7)
コード例 #7
0
 def test_rtfund_xxpl(self):
     html = self.html('rtfund_xxpl.html')
     result = extract_list(html, base_url='http://www.rtfund.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 15)
コード例 #8
0
 def test_hsqhfunds_announcement(self):
     html = self.html('hsqhfunds_announcement.html')
     result = extract_list(html, base_url='https://www.hsqhfunds.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 20)
コード例 #9
0
 def test_dfa66_announcement(self):
     html = self.html('dfa66_announcement.html')
     result = extract_list(html, base_url='https://www.dfa66.com/')
     print(jsonify(result))
     self.assertEqual(len(result), 10)
コード例 #10
0
 def test_hrfund_announcement(self):
     html = self.html('hrfund_announcement.html')
     result = extract_list(html, base_url='http://www.hr-fund.com.cn/news')
     print(jsonify(result))
     self.assertEqual(len(result), 10)
コード例 #11
0
ファイル: main.py プロジェクト: yueconger/GerapyAutoExtractor
from gerapy_auto_extractor import extract_list, extract_detail, is_detail, is_list, probability_of_detail, \
    probability_of_list
from gerapy_auto_extractor.helpers import content, jsonify

html = content('samples/list/sample.html')
print(jsonify(extract_list(html)))

html = content('samples/detail/sample.html')
print(jsonify(extract_detail(html)))

html = content('samples/detail/sample.html')
print(probability_of_detail(html), probability_of_list(html))
print(is_detail(html), is_list(html))

html = content('samples/list/sample.html')
print(probability_of_detail(html), probability_of_list(html))
print(
    is_detail(html),
    is_list(html),
)