Python datCrawl Examples

Programming Language: Python

Method/Function: datCrawl

Examples at hotexamples.com: 5

Python datCrawl - 5 examples found. These are the top rated real world Python examples of datCrawl extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: simple.py Project: priestd09/datCrawl

            response = urllib2.urlopen(req)
            data = response.read()
            return data
        except Exception as error:
            raise Exception("Error downloading %s:" % (url, error))


class AwesomeWikipediaTitleCrawler(Crawler):
    urls = [
        ('get_title', '(?P<url>http\:\/\/en.wikipedia.org\/wiki\/(?P<name>.*))', )
    ]
    downloader = 'DefaultDownloaderWithCustomUserAgent'
    # Downloader options with custom user agent.
    downloader_options = {
        'headers': {'User-agent': 'Firefox'}
    }

    def action_get_title(self, data, **kwargs):
        try:
            document = document_fromstring(data)
            selector = CSSSelector('h1.firstHeading > span')
            return {'title': selector(document)[0].text}
        except Exception as e:
            print e

crawler = datCrawl()
crawler.register_downloader(DefaultDownloaderWithCustomUserAgent)
crawler.register_crawler(AwesomeWikipediaTitleCrawler)
print crawler.run("http://en.wikipedia.org/wiki/Python_(programming_language)")
# returns {'title': 'Python (programming language)'}

Example #2

Show file

File: test_base.py Project: priestd09/datCrawl

 def test_instance_check(self):
     core = datCrawl()
     self.assertTrue(isinstance(core, datCrawl))

Example #3

Show file

File: test_base.py Project: priestd09/datCrawl

 def test_running_full_crawler(self):
     core = datCrawl()
     core.register_crawler(AwesomeWikipediaTitleCrawler)
     result = core.run(URL)
     self.assertEqual(result['title'], 'Python')

Example #4

Show file

File: test_base.py Project: priestd09/datCrawl

 def test_worker_instance(self):
     core = datCrawl()
     core.register_crawler(AwesomeWikipediaTitleCrawler)
     worker = core.worker(URL)
     self.assertTrue(isinstance(worker, datCrawlWorker))
     self.assertEqual(URL, worker.url)

Example #5

Show file

File: test_base.py Project: priestd09/datCrawl

 def test_register_urls(self):
     core = datCrawl()
     data = ('action', 'http://www.google.es/', 'AwesomeGoogleCrawler')
     core.register_url(data[0], data[1], data[2])
     self.assertEqual(core.urls[0], data)