Python Silk Examples

Programming Language: Python

Namespace/Package Name: models

Class/Type: Silk

Examples at hotexamples.com: 17

Python Silk - 17 examples found. These are the top rated real world Python examples of models.Silk extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get(9)

register(3)

crawl(2)

delete_local_file(2)

get_local_file(2)

parse_url(2)

add_request(1)

fetch_and_save(1)

loop(1)

parse(1)

start(1)

Example #1

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_incorrect_parse_xpath(self):
     s = Silk(self.io_loop)
     s.parse_url('//count()',LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     try:
         self.wait()
     except XPathEvalError:
         pass

Example #2

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_simplehttpserver(self):
     s = Silk(self.io_loop)
     s.get(LOCAL_URL%(LOCAL_PORT,'/'), self.stop)
     response = self.wait()
     self.assertEqual(response.code, 200)
     s.get(LOCAL_URL%(LOCAL_PORT,'thisdoesnotexist.html'),self.stop)
     response = self.wait()
     self.assertEqual(response.code, 404)

Example #3

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_can_register_spiders(self):
     spider1 = Spider()
     spider2 = Spider()
     s = Silk(self.io_loop)
     s.register(spider1)
     s.register(spider2)
     self.assertIn(spider1, s.spiders)
     self.assertIn(spider2, s.spiders)

Example #4

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test__find_urls(self):
     s = Silk(self.io_loop, allowed_domains=['www.dmoz.org'], fail_silent=False)
     s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     response = self.wait()
     spider = Spider()
     spider._find_urls(response, self.stop)
     links = self.wait()
     self.assertIn(['http://www.google.com',
                    'page1.html'], links)

Example #5

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_parse(self):
     s = Silk(self.io_loop)
     s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     response = self.wait()
     s.parse('//text()', response, self.stop)
     xpath_elements = self.wait()
     self.assertTrue(type(xpath_elements=='list'))
     text_string = ''.join(xpath_elements)
     self.assertIn('test',text_string)

Example #6

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_spider_prints_urls_without_callback(self):
     allow_regex = ['Python','Ruby']
     deny_regex = ['Deutsch']
     
     spider1 = Spider(allow_regex, deny_regex, callback=None)
     s = Silk(self.io_loop, allowed_domains=['www.dmoz.org'], fail_silent=False)
     s.register(spider1)
     s.crawl('http://www.dmoz.org/Computers/Programming/Languages/Python/Books/',
             self.stop)
     response = self.wait()

Example #7

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_domains_single_domain(self):
     domains = [
         '127.0.0.1:%s'%(LOCAL_PORT),
     ]
     
     s = Silk(self.io_loop, allowed_domains=domains)
     s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     response = self.wait()
     self.assertIn("test paragraph", response.body)
     
     s.get('http://google.com', self.stop)
     response = self.wait()
     self.assertEqual(response.body, '') # Silently fails and returns an empty body

Example #8

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_add_requests(self):
     domains = [
         'www.dmoz.org',
     ]
     s = Silk(self.io_loop, allowed_domains=domains, fail_silent=False)
     s.add_request('http://www.dmoz.org/Computers/Programming/Languages/Python/Books/',
                    self.stop)
     response = self.wait()
     self.assertIn('dmoz', response.body)
     s.add_request('http://www.dmoz.org/Computers/Programming/Languages/Python/Books/',
                    self.stop)
     response = self.wait()
     self.assertIn('dmoz', response.body)

Example #9

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_multiple_domains(self):
     domains = [
         'www.dmoz.org',
         'www.google.com',
     ]
     
     s3 = Silk(self.io_loop, allowed_domains=domains)
     s3.get('http://www.dmoz.org', self.stop)
     response = self.wait()
     self.assertIn("dmoz", response.body)
     s3.get('http://www.google.com', self.stop)
     response = self.wait()
     self.assertIn("Google", response.body)

Example #10

0

Show file

File: tests.py Project: mrmagooey/gossamer

    def test_domains_fail_loudly(self):
        domains = [
            'www.dmoz.org',
        ]

        s = Silk(self.io_loop, allowed_domains=domains, fail_silent=False)
        s.get('http://www.dmoz.org', self.stop)
        response = self.wait()
        self.assertIn("dmoz", response.body)
        try:
            s.get('http://google.com', self.stop)
            self.wait()
        except ExternalDomainError as ex:
            self.assertEquals(type(ExternalDomainError('')), type(ex))

Example #11

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_local_file_storage(self):
     s = Silk(self.io_loop)
     s.fetch_and_save(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     response = self.wait()
     s.get_local_file(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     local_file = self.wait()
     self.assertEqual(response.body, local_file.body)
     s.delete_local_file(LOCAL_URL%(LOCAL_PORT,'index.html'))

Example #12

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_debug_setting(self):
     """
     Test that with debug=True that files are being saved to the local disk.
     """
     s = Silk(self.io_loop, debug=True)
     s.get(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     response = self.wait()
     s.get_local_file(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     cached_response = self.wait()
     self.assertEqual(response.body, cached_response.body)
     s.delete_local_file(LOCAL_URL%(LOCAL_PORT,'index.html'))

Example #13

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_subdomain(self):
     domains = [
         'www.google.com',
     ]
     
     s = Silk(self.io_loop, allowed_domains=domains)
     s.get('http://google.com', self.stop)
     response = self.wait()
     self.assertEqual(len(response.body), 0)
     
     s = Silk(self.io_loop, allowed_domains=domains)
     s.get('http://www.google.com', self.stop)
     response = self.wait()
     self.assertIn('google', response.body)

Example #14

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test__crawl(self):
     spider = Spider()
     s = Silk(self.io_loop, allowed_domains=[''])
     s.register(spider)
     s.crawl(LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)

Example #15

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_start(self):
     s = Silk()
     s.loop = IOLoop.instance()
     s.start()

Example #16

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_parse_url(self):
     s = Silk(self.io_loop)
     s.parse_url('//text()', LOCAL_URL%(LOCAL_PORT,'index.html'), self.stop)
     xpath_elements = self.wait()
     self.assertTrue(type(xpath_elements=='list'))

Example #17

0

Show file

File: tests.py Project: mrmagooey/gossamer

 def test_get(self):
     s = Silk(self.io_loop)
     s.get(LOCAL_URL%(LOCAL_PORT,'index.html'),self.stop)
     response = self.wait()
     self.assertIn("Test paragraph", response.body)