コード例 #1
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_name_attributes(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://127.0.0.1:8888"), selector_field.to_python(".hd"), username="******", password="******", timeout=3, name_attributes=["class"] )
     
     self.assertEqual(len(result['hd']), 31)
コード例 #2
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_name_attributes_separate_fields(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://127.0.0.1:8888"), selector_field.to_python(".hd"), username="******", password="******", timeout=3, name_attributes=["class"], output_matches_as_separate_fields=True, output_matches_as_mv=False)
     
     self.assertEqual(result['match_hd_1'], 'Mode:')
コード例 #3
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_unavailable_page(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://192.168.30.23/"), selector_field.to_python(".hero-unit.main_background"), timeout=3 )
     
     self.assertEqual(result['timed_out'], True)
コード例 #4
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_name_attributes_escaped_name(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://127.0.0.1:8888"), selector_field.to_python("input"), username="******", password="******", timeout=3, name_attributes=["onclick"], include_empty_matches=True)
     
     self.assertTrue('btnBerTest__' in result)
     self.assertTrue('btnReset__' in result)
コード例 #5
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_with_invalid_credentials(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://127.0.0.1:8888"), selector_field.to_python("tr"), timeout=3, output_matches_as_mv=True )
     
     #print result['match']
     self.assertEqual(len(result['match']), 0)
コード例 #6
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_unparsable(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/media/images/link_external.png"), selector_field.to_python(".hero-unit .main_background"), timeout=3, output_matches_as_mv=True )
     self.assertEqual(result['match'], [])
コード例 #7
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_encoding_detect_meta(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/work/new-testament/Mark/1/2"), selector_field.to_python(".verse-container"), charset_detect_meta_enabled=True, charset_detect_content_type_header_enabled=False, charset_detect_sniff_enabled=False )
     self.assertEqual(result['response_code'], 200)
     self.assertEqual(result['encoding'], "utf-8")
コード例 #8
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/"), selector_field.to_python(".hero-unit.main_background") )
     self.assertEqual(result['response_code'], 200)
     self.assertEqual(len(result['match']), 1)
コード例 #9
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_adjacent_selector(self):
     # For bug: http://lukemurphey.net/issues/773
     
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/"), selector_field.to_python("h1+p,.sharing-buttons"), timeout=3, output_matches_as_mv=True )
     self.assertEqual(len(result['match']), 2)
コード例 #10
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_bad_encoding(self):
     #http://lukemurphey.net/issues/987
     
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://rss.slashdot.org/Slashdot/slashdot"), selector_field.to_python("description") )
     self.assertEqual(result['response_code'], 200)
     self.assertGreater(len(result['match']), 0)
     self.assertEqual(result['encoding'], "ISO-8859-1")
コード例 #11
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_encoding_detect_page(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/work/new-testament/Mark/1/2?async"), selector_field.to_python(".verse-container") )
     self.assertEqual(result['response_code'], 200)
     self.assertEqual(len(result['match']), 45)
     #print result['match']
     self.assertEqual(unicodedata.normalize('NFC', result['match'][1]), unicodedata.normalize('NFC', u"2 Καθὼς γέγραπται ἐν τῷ Ἠσαίᾳ τῷ προφήτῃ Ἰδοὺ ἀποστέλλω τὸν ἄγγελόν μου πρὸ προσώπου σου , ὃς κατασκευάσει τὴν ὁδόν σου :"))
     self.assertEqual(result['encoding'], "utf-8")
コード例 #12
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_child_text(self):
     # This text ensure that text from nodes under the selected nodes is properly extracted
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/"), selector_field.to_python(".hero-unit.main_background"), output_matches_as_mv=True )
     self.assertEqual(result['response_code'], 200)
     self.assertEqual(len(result['match']), 1)
     
     self.assertEqual(result['match'][0], "Ancient Greek, Modern Design TextCritical.net is a website that provides a library of ancient Greek works")
コード例 #13
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_scrape_page_mv(self):
     web_input = WebInput(timeout=3)
     
     url_field = URLField( "test_web_input", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("http://textcritical.net/"), selector_field.to_python("h2"), output_matches_as_mv=True )
     self.assertEqual(result['response_code'], 200)
     self.assertEqual(len(result['match']), 3)
     
     out = StringIO()
     web_input.output_event(result, stanza="web_input://textcritical_net", index="main", source="test_web_input", sourcetype="sourcetype", out=out)
     self.assertEquals( len(re.findall("match=", out.getvalue())), 3)
コード例 #14
0
ファイル: unit.py プロジェクト: J-C-B/splunk-web-input
 def test_input_timeout(self):
     url_field = URLField( "test_input_timeout", "title", "this is a test" )
     selector_field = SelectorField( "test_web_input_css", "title", "this is a test" )
     result = WebInput.scrape_page( url_field.to_python("https://192.168.30.23/"), selector_field.to_python("div"), timeout=3 )
     
     self.assertEquals(result['timed_out'], True)