Ejemplo n.º 1
0
 def test_create_with_node(self):
     import lxml.html
     node = lxml.html.fromstring(self.sample_html)
     spider = PyCrawl(node=node)
     self.assertEqual(
         re.sub(r"[\n ]", "", self.sample_html),
         re.sub(r"[\n ]", "", spider.outer_text())
     )
Ejemplo n.º 2
0
 def test_submit_form(self):
     spider = PyCrawl(url="https://jkorpela.fi/forms/testing.html")
     spider.send(name="Comments", value="hello")
     # spider.send(name="box", selected=True)
     spider.submit(method="POST")
     self.assertEqual("hello", spider.table["Comments"])
Ejemplo n.º 3
0
 def test_extract_table(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual("Alice", spider.table["name"])
     self.assertEqual("20", spider.table["age"])
Ejemplo n.º 4
0
 def test_find_node_attr(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual(self.sample_url, spider.css("a").attr("href"))
Ejemplo n.º 5
0
 def test_find_deep_node(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual("sample text 5", spider.css("div").css("p").inner_text())
     self.assertEqual("sample text 5", spider.css("div").css("p")[0].inner_text())
     self.assertEqual("sample text 6", spider.css("div").css("p")[1].inner_text())
Ejemplo n.º 6
0
 def test_find_node_with_attr(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual("sample text 3", spider.css("#test_id").inner_text())
     self.assertEqual("sample text 4", spider.css(".test_class").inner_text())
Ejemplo n.º 7
0
 def test_find_node_with_xpath(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual(4, len(spider.xpath("/html/body/p")))
Ejemplo n.º 8
0
 def test_find_node_with_css(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual(6, len(spider.css("p")))
Ejemplo n.º 9
0
 def test_send_params(self):
     spider = PyCrawl(html=self.sample_html)
     spider.send(id="test id", value="hello")
     spider.send(id="test id", check=True)
     spider.send(id="test id", file_name="tests/sample.html")
     self.assertEqual(3, len(spider.params))
Ejemplo n.º 10
0
 def test_create_error(self):
     with self.assertRaises(Exception):
         PyCrawl()
Ejemplo n.º 11
0
 def test_create_with_html(self):
     spider = PyCrawl(html=self.sample_html)
     self.assertEqual(self.sample_html, spider.html)
Ejemplo n.º 12
0
 def test_create_with_url(self):
     spider = PyCrawl(self.sample_url)
     self.assertEqual(self.sample_url, spider.url)