-
Notifications
You must be signed in to change notification settings - Fork 2
/
testunit.py
executable file
·52 lines (40 loc) · 1.35 KB
/
testunit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
# coding: utf-8
import sys
from optparse import OptionParser
from twisted.internet import reactor
from twisted.web import client
from scrapy.http import HtmlResponse
from yowa.utils import ParserManager, urltools
class Crawl:
def __init__(self, url):
self.url = url
self.response = None
def result(self, body):
self.response = HtmlResponse(url = self.url, body = body)
reactor.stop()
def fetch(self):
deferred = client.getPage(self.url)
deferred.addCallback(self.result)
reactor.run()
def run():
parser = OptionParser()
parser.add_option("-t", "--tpl", dest="template_name",
help="specified a template")
parser.add_option("-u", "--url", dest="request_url",
help="specified a request url as 'http://www.baidu.com/'")
(options, args) = parser.parse_args()
tpl = options.template_name
url = options.request_url
if not tpl or not url:
sys.stderr.write("Type './testunit.py --help' for usage.\n")
sys.exit(1)
spider = Crawl(url)
spider.fetch()
pm = ParserManager(urltools.get_domain(url))
p = pm.create(tpl, response = spider.response)
item = p.extract()
for kv in item.withdict().items():
print u'[%s]: %s' % kv
if __name__ == '__main__':
run()