def test_simplehttpserver(self): s = Silk(self.io_loop) s.get(LOCAL_URL%(LOCAL_PORT,'/'), self.stop) response = self.wait() self.assertEqual(response.code, 200) s.get(LOCAL_URL%(LOCAL_PORT,'thisdoesnotexist.html'),self.stop) response = self.wait() self.assertEqual(response.code, 404)
def test_parse(self): s = Silk(self.io_loop) s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop) response = self.wait() s.parse('//text()', response, self.stop) xpath_elements = self.wait() self.assertTrue(type(xpath_elements=='list')) text_string = ''.join(xpath_elements) self.assertIn('test',text_string)
def test__find_urls(self): s = Silk(self.io_loop, allowed_domains=['www.dmoz.org'], fail_silent=False) s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop) response = self.wait() spider = Spider() spider._find_urls(response, self.stop) links = self.wait() self.assertIn(['http://www.google.com', 'page1.html'], links)
def test_debug_setting(self): """ Test that with debug=True that files are being saved to the local disk. """ s = Silk(self.io_loop, debug=True) s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop) response = self.wait() s.get_local_file(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop) cached_response = self.wait() self.assertEqual(response.body, cached_response.body) s.delete_local_file(LOCAL_URL%(LOCAL_PORT,'index.html'))
def test_multiple_domains(self): domains = [ 'www.dmoz.org', 'www.google.com', ] s3 = Silk(self.io_loop, allowed_domains=domains) s3.get('http://www.dmoz.org', self.stop) response = self.wait() self.assertIn("dmoz", response.body) s3.get('http://www.google.com', self.stop) response = self.wait() self.assertIn("Google", response.body)
def test_domains_single_domain(self): domains = [ '127.0.0.1:%s'%(LOCAL_PORT), ] s = Silk(self.io_loop, allowed_domains=domains) s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop) response = self.wait() self.assertIn("test paragraph", response.body) s.get('http://google.com', self.stop) response = self.wait() self.assertEqual(response.body, '') # Silently fails and returns an empty body
def test_domains_fail_loudly(self): domains = [ 'www.dmoz.org', ] s = Silk(self.io_loop, allowed_domains=domains, fail_silent=False) s.get('http://www.dmoz.org', self.stop) response = self.wait() self.assertIn("dmoz", response.body) try: s.get('http://google.com', self.stop) self.wait() except ExternalDomainError as ex: self.assertEquals(type(ExternalDomainError('')), type(ex))
def test_subdomain(self): domains = [ 'www.google.com', ] s = Silk(self.io_loop, allowed_domains=domains) s.get('http://google.com', self.stop) response = self.wait() self.assertEqual(len(response.body), 0) s = Silk(self.io_loop, allowed_domains=domains) s.get('http://www.google.com', self.stop) response = self.wait() self.assertIn('google', response.body)
def test_get(self): s = Silk(self.io_loop) s.get(LOCAL_URL%(LOCAL_PORT,'index.html'),self.stop) response = self.wait() self.assertIn("Test paragraph", response.body)