예제 #1
0
def gen_spider(spider, domain):
    pm = SpiderManager()
    if spider in pm.get_list():
        raise Exception(u"Spider %s exists." % spider)
    default_filename = "spiders/%s.py" % spider
    with open("utils/spider.tpl") as t:
        content = t.read()

    content %= {'Spider': spider.title(), 'spider': spider, 'domain': domain}
    with open(default_filename, "w") as f:
        f.write(content)

    print u"Spider %s has been create into %s." % (spider, default_filename)
예제 #2
0
    def install(self, options):
        self.event = Event()
        sm = SpiderManager()
        self.spider_class = sm.create(options.spider)
        spider_qk = "spider:q:%s" % self.spider_class.name
        schedule_qk = "schedule:q:%s" % self.spider_class.name

        self.log = Logger().getlog(level=logging.getLevelName(options.level))
        self.spider_queue = Queue(spider_qk, self.redis)
        self.schedule_queue = Queue(schedule_qk, self.redis)
        self.pipeline_queue = queue.Queue()

        self.fp = Dupefilter.from_crawler(self)
        self.fp.open(self.spider_class.name)

        self.forever = options.forever
        self.running = False
        if options.urls:
            self.addition_urls = options.urls.split(',')
        else:
            self.addition_urls = []
        self.schedule = Schedule(self)
예제 #3
0
def list_spiders():
    sm = SpiderManager()
    print ', '.join(sm.get_list())