Beispiel #1
0
    def webconsole_control(self, wc_request):
        args = wc_request.args
        s = "<hr />\n"

        if "stop_running_domains" in args:
            s += "<p>"
            stopped_domains = []
            for domain in args["stop_running_domains"]:
                if domain in self.running:
                    scrapyengine.close_spider(self.running[domain])
                    stopped_domains.append(domain)
            s += "Stopped spiders: <ul><li>%s</li></ul>" % "</li><li>".join(stopped_domains)
            s += "</p>"
        if "remove_pending_domains" in args:
            removed = []
            for domain in args["remove_pending_domains"]:
                if scrapyengine.spider_scheduler.remove_pending_domain(domain):
                    removed.append(domain)
            if removed:
                s += "<p>"
                s += "Removed scheduled spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["remove_pending_domains"])
                s += "</p>"
        if "add_pending_domains" in args:
            for domain in args["add_pending_domains"]:
                if domain not in scrapyengine.scheduler.pending_requests:
                    scrapymanager.crawl(domain)
            s += "<p>"
            s += "Scheduled spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["add_pending_domains"])
            s += "</p>"
        if "rerun_finished_domains" in args:
            for domain in args["rerun_finished_domains"]:
                if domain not in scrapyengine.scheduler.pending_requests:
                    scrapymanager.crawl(domain)
                self.finished.remove(domain)
            s += "<p>"
            s += "Re-scheduled finished spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["rerun_finished_domains"])
            s += "</p>"

        return s
Beispiel #2
0
 def crawl(self, spider, task):
     ''' Crawl task on specific spider '''
     spider.load(task)
     scrapymanager.crawl(*spider.start_urls)