Ejemplo n.º 1
0
    def run(self, args, opts):
        if not len(args) == 1 or not is_url(args[0]):
            return False

        responses = []  # to collect downloaded responses
        request = Request(args[0], callback=responses.append)

        if opts.spider:
            try:
                spider = spiders.create(opts.spider)
            except KeyError:
                log.msg("Unable to find spider: %s" % opts.spider, log.ERROR)
                return
        else:
            spider = spiders.create_for_request(request)

        scrapymanager.configure()
        scrapymanager.queue.append_request(request, spider)
        scrapymanager.start()

        if not responses:
            log.msg("No response returned", log.ERROR, spider=spider)
            return

        # now process response
        #   - if callbacks defined then call each one print results
        #   - if --rules option given search for matching spider's rule
        #   - default print result using default 'parse' spider's callback
        response = responses[0]

        if self.callbacks:
            # apply each callback
            for callback in self.callbacks:
                items, links = self.run_callback(spider, response, callback, args, opts)
                self.print_results(items, links, callback, opts)
        elif opts.rules:
            # search for matching spider's rule
            if hasattr(spider, "rules") and spider.rules:
                items, links = [], []
                for rule in spider.rules:
                    if rule.link_extractor.matches(response.url) and rule.callback:

                        items, links = self.run_callback(spider, response, rule.callback, args, opts)
                        self.print_results(items, links, rule.callback, opts)
                        # first-match rule breaks rules loop
                        break
            else:
                log.msg(
                    'No rules found for spider "%s", ' "please specify a callback for parsing" % spider.name, log.ERROR
                )
        else:
            # default callback 'parse'
            items, links = self.run_callback(spider, response, "parse", args, opts)
            self.print_results(items, links, "parse", opts)
Ejemplo n.º 2
0
    def fetch(self, request_or_url, print_help=False):
        if isinstance(request_or_url, Request):
            request = request_or_url
            url = request.url
        else:
            url = parse_url(request_or_url)
            request = Request(url)

        spider = spiders.create_for_request(request, BaseSpider('default'), \
            log_multiple=True)

        print "Fetching %s..." % request
        scrapymanager.engine.open_spider(spider)
        response = threads.blockingCallFromThread(reactor, scrapymanager.engine.schedule, \
            request, spider)
        if response:
            self.populate_vars(url, response, request, spider)
            if print_help:
                self.print_help()
            else:
                print "Done - use shelp() to see available objects"