/
push_url.py
47 lines (37 loc) · 1.3 KB
/
push_url.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
# !/usr/bin/env python
import sys
import json
import redis
from scrapy.spiderloader import SpiderLoader
from scraper.daemon import get_scraper_settings
class UrlPusher(object):
def __init__(self):
settings = get_scraper_settings()
loader = SpiderLoader.from_settings(settings)
self.spider_names = loader.list()
self.server = redis.StrictRedis(host=settings.get('REDIS_HOST'), port=settings.get('REDIS_PORT'))
def has_spider(self, spider_name):
return spider_name in self.spider_names
def push(self, spider_name, *urls):
key = spider_name + ':queue'
for url in urls:
data = json.dumps({'url': url})
self.server.zadd(key, **{data: 0})
if __name__ == '__main__':
argv = sys.argv[1:]
if len(argv) < 2:
print 'Usage: python push_url.py [spider_name] "[urls]"...'
print ''
print 'Example:'
print ' python push_url.py spider "www.google.com" "www.facebook.com"'
sys.exit(0)
else:
spider_name = argv[0]
pusher = UrlPusher()
if pusher.has_spider(spider_name):
pusher.push(spider_name, *argv[1:])
sys.exit(0)
else:
print "Spider: [%s] doesn't exist!" % spider_name
sys.exit(1)