Ejemplo n.º 1
0
 def _process_spidermw_output(self, output, request, response, spider):
     """Process each Request/Item (given in the output parameter) returned
     from the given spider
     """
     # TODO: keep closing state internally instead of checking engine
     if spider in self.engine.closing:
         return
     elif isinstance(output, Request):
         send_catch_log(signal=signals.request_received, request=output, \
             spider=spider)
         self.engine.crawl(request=output, spider=spider)
     elif isinstance(output, BaseItem):
         log.msg("Scraped %s in <%s>" % (output, request.url), level=log.DEBUG, \
             spider=spider)
         send_catch_log(signal=signals.item_scraped, sender=self.__class__, \
             item=output, spider=spider, response=response)
         self.sites[spider].itemproc_size += 1
         # FIXME: this can't be called here because the stats spider may be
         # already closed
         #stats.max_value('scraper/max_itemproc_size', \
         #        self.sites[spider].itemproc_size, spider=spider)
         dfd = self.itemproc.process_item(output, spider)
         dfd.addBoth(self._itemproc_finished, output, spider)
         return dfd
     elif output is None:
         pass
     else:
         log.msg("Spider must return Request, BaseItem or None, got %r in %s" % \
             (type(output).__name__, request), log.ERROR, spider=spider)
Ejemplo n.º 2
0
 def _on_success(response):
     assert isinstance(response, (Response, Request))
     if isinstance(response, Response):
         response.request = request  # tie request to response received
         log.msg(log.formatter.crawled(request, response, spider), level=log.DEBUG, spider=spider)
         send_catch_log(signal=signals.response_received, response=response, request=request, spider=spider)
     return response
Ejemplo n.º 3
0
 def close_spider(self, spider, reason):
     send_catch_log(stats_spider_closing, spider=spider, reason=reason)
     stats = self._stats.pop(spider)
     send_catch_log(stats_spider_closed, spider=spider, reason=reason, spider_stats=stats)
     if self._dump:
         log.msg("Dumping spider stats:\n" + pprint.pformat(stats), spider=spider)
     self._persist_stats(stats, spider)
Ejemplo n.º 4
0
 def handle_spider_error(self, _failure, request, response, spider):
     exc = _failure.value
     if isinstance(exc, CloseSpider):
         self.crawler.engine.close_spider(spider, exc.reason or "cancelled")
         return
     log.err(_failure, "Spider error processing %s" % request, spider=spider)
     send_catch_log(signal=signals.spider_error, failure=_failure, response=response, spider=spider)
     stats.inc_value("spider_exceptions/%s" % _failure.value.__class__.__name__, spider=spider)
Ejemplo n.º 5
0
 def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None):
     referer = request.headers.get('Referer', None)
     msg = "Spider error processing <%s> (referer: <%s>)" % \
         (request.url, referer)
     log.err(_failure, msg, spider=spider)
     send_catch_log(signal=signals.spider_error, failure=_failure, response=response, \
         spider=spider)
     stats.inc_value("spider_exceptions/%s" % _failure.value.__class__.__name__, \
         spider=spider)
Ejemplo n.º 6
0
    def open_spider(self, spider):
        log.msg("Spider opened", spider=spider)
        self.next_request(spider)

        self.downloader.open_spider(spider)
        self.scraper.open_spider(spider)
        stats.open_spider(spider)

        send_catch_log(signals.spider_opened, sender=self.__class__, spider=spider)
Ejemplo n.º 7
0
 def close_spider(self, spider, reason):
     send_catch_log(stats_spider_closing, spider=spider, reason=reason)
     stats = self._stats.pop(spider)
     send_catch_log(stats_spider_closed, spider=spider, reason=reason, \
         spider_stats=stats)
     if self._dump:
         log.msg("Dumping spider stats:\n" + pprint.pformat(stats), \
             spider=spider)
     self._persist_stats(stats, spider)
Ejemplo n.º 8
0
 def _on_success(response):
     assert isinstance(response, (Response, Request))
     if isinstance(response, Response):
         response.request = request # tie request to response received
         log.msg(log.formatter.crawled(request, response, spider), \
             level=log.DEBUG, spider=spider)
         send_catch_log(signal=signals.response_received, \
             response=response, request=request, spider=spider)
     return response
Ejemplo n.º 9
0
 def start(self):
     """Start the execution engine"""
     if self.running:
         return
     self.start_time = time()
     send_catch_log(signal=signals.engine_started, sender=self.__class__)
     self._mainloop_task.start(5.0, now=True)
     reactor.callWhenRunning(self._mainloop)
     self.running = True
Ejemplo n.º 10
0
 def test_error_logged_if_deferred_not_supported(self):
     test_signal = object()
     test_handler = lambda: defer.Deferred()
     dispatcher.connect(test_handler, test_signal)
     with LogCapture() as l:
         send_catch_log(test_signal)
     self.assertEqual(len(l.records), 1)
     self.assertIn("Cannot return deferreds from signal handler", str(l))
     dispatcher.disconnect(test_handler, test_signal)
Ejemplo n.º 11
0
 def test_error_logged_if_deferred_not_supported(self):
     test_signal = object()
     test_handler = lambda: defer.Deferred()
     dispatcher.connect(test_handler, test_signal)
     with LogCapture() as l:
         send_catch_log(test_signal)
     self.assertEqual(len(l.records), 1)
     self.assertIn("Cannot return deferreds from signal handler", str(l))
     dispatcher.disconnect(test_handler, test_signal)
Ejemplo n.º 12
0
 def open_spider(self, spider):
     assert self.has_capacity(), "No free spider slots when opening %r" % \
         spider.name
     log.msg("Spider opened", spider=spider)
     self.scheduler.open_spider(spider)
     self.downloader.open_spider(spider)
     self.scraper.open_spider(spider)
     stats.open_spider(spider)
     send_catch_log(signals.spider_opened, sender=self.__class__, spider=spider)
     self.next_request(spider)
Ejemplo n.º 13
0
 def _on_success(response):
     """handle the result of a page download"""
     assert isinstance(response, (Response, Request))
     if isinstance(response, Response):
         response.request = request # tie request to response received
         log.msg(log.formatter.crawled(request, response, spider), \
             level=log.DEBUG, spider=spider)
         send_catch_log(signal=signals.response_received, \
             response=response, request=request, spider=spider)
     return response
Ejemplo n.º 14
0
 def handle_spider_error(self, _failure, request, response, spider):
     exc = _failure.value
     if isinstance(exc, CloseSpider):
         self.crawler.engine.close_spider(spider, exc.reason or 'cancelled')
         return
     log.err(_failure, "Spider error processing %s" % request, spider=spider)
     send_catch_log(signal=signals.spider_error, failure=_failure, response=response, \
         spider=spider)
     stats.inc_value("spider_exceptions/%s" % _failure.value.__class__.__name__, \
         spider=spider)
Ejemplo n.º 15
0
    def parse(self, response, basic_link_info, spider):
        if not self.conditon_permit(response, basic_link_info, spider):
            return  ReturnStatus.move_on
        
        self.log("use parser: %s" % type(self))
        self.init_context(response, basic_link_info, spider)
        item_num = self.process()
        send_catch_log(signal=signals.item_extracted,
            url=self.response.url, item_num=item_num)

        return ReturnStatus.stop_it
Ejemplo n.º 16
0
    def parse(self, response, basic_link_info, spider):
        if not self.conditon_permit(response, basic_link_info, spider):
            return ReturnStatus.move_on

        self.log("use parser: %s" % type(self))
        self.init_context(response, basic_link_info, spider)
        link_num = self.process()
        send_catch_log(signal=signals.link_extracted,
                       url=self.response.url,
                       link_num=link_num)

        return ReturnStatus.stop_it
Ejemplo n.º 17
0
 def test_error_logged_if_deferred_not_supported(self):
     test_signal = object()
     test_handler = lambda: defer.Deferred()
     log_events = []
     txlog.addObserver(log_events.append)
     dispatcher.connect(test_handler, test_signal)
     send_catch_log(test_signal)
     self.assertTrue(log_events)
     self.assertIn("Cannot return deferreds from signal handler", str(log_events))
     txlog.removeObserver(log_events.append)
     self.flushLoggedErrors()
     dispatcher.disconnect(test_handler, test_signal)
Ejemplo n.º 18
0
 def test_error_logged_if_deferred_not_supported(self):
     test_signal = object()
     test_handler = lambda: defer.Deferred()
     log_events = []
     txlog.addObserver(log_events.append)
     dispatcher.connect(test_handler, test_signal)
     send_catch_log(test_signal)
     self.failUnless(log_events)
     self.failUnless(
         "Cannot return deferreds from signal handler" in str(log_events))
     txlog.removeObserver(log_events.append)
     self.flushLoggedErrors()
     dispatcher.disconnect(test_handler, test_signal)
Ejemplo n.º 19
0
def execute(argv=None):
    if argv is None:
        argv = sys.argv

    cmds = _get_commands_dict()

    cmdname = _get_command_name(argv)
    _update_default_settings("scrapy.conf.commands", cmdname)
    _update_default_settings(settings["COMMANDS_SETTINGS_MODULE"], cmdname)

    parser = optparse.OptionParser(
        formatter=optparse.TitledHelpFormatter(), conflict_handler="resolve", add_help_option=False
    )

    if cmdname in cmds:
        cmd = cmds[cmdname]
        cmd.add_options(parser)
        opts, args = parser.parse_args(args=argv[1:])
        cmd.process_options(args, opts)
        parser.usage = "%%prog %s %s" % (cmdname, cmd.syntax())
        parser.description = cmd.long_desc()
        if cmd.requires_project and not settings.settings_module:
            print "Error running: scrapy-ctl.py %s\n" % cmdname
            print "Cannot find project settings module in python path: %s" % settings.settings_module_path
            sys.exit(1)
        if opts.help:
            parser.print_help()
            sys.exit()
    elif not cmdname:
        cmd = ScrapyCommand()
        cmd.add_options(parser)
        opts, args = parser.parse_args(args=argv)
        cmd.process_options(args, opts)
        _print_usage(settings.settings_module)
        sys.exit(2)
    else:
        print "Unknown command: %s\n" % cmdname
        print 'Use "scrapy-ctl.py -h" for help'
        sys.exit(2)

    del args[0]  # remove command name from args
    send_catch_log(signal=command_executed, cmdname=cmdname, cmdobj=cmd, args=args, opts=opts)
    from scrapy.core.manager import scrapymanager

    scrapymanager.configure(control_reactor=True)
    ret = _run_command(cmd, args, opts)
    if ret is False:
        parser.print_help()
Ejemplo n.º 20
0
    def send(self, to, subject, body, cc=None, attachs=()):
        if attachs:
            msg = MIMEMultipart()
        else:
            msg = MIMENonMultipart('text', 'plain')
        msg['From'] = self.mailfrom
        msg['To'] = COMMASPACE.join(to)
        msg['Date'] = formatdate(localtime=True)
        msg['Subject'] = subject
        rcpts = to[:]
        if cc:
            rcpts.extend(cc)
            msg['Cc'] = COMMASPACE.join(cc)

        if attachs:
            msg.attach(MIMEText(body))
            for attach_name, mimetype, f in attachs:
                part = MIMEBase(*mimetype.split('/'))
                part.set_payload(f.read())
                Encoders.encode_base64(part)
                part.add_header('Content-Disposition', 'attachment; filename="%s"' \
                    % attach_name)
                msg.attach(part)
        else:
            msg.set_payload(body)

        send_catch_log(signal=mail_sent,
                       to=to,
                       subject=subject,
                       body=body,
                       cc=cc,
                       attach=attachs,
                       msg=msg)

        if self.debug:
            log.msg('Debug mail sent OK: To=%s Cc=%s Subject="%s" Attachs=%d' % \
                (to, cc, subject, len(attachs)), level=log.DEBUG)
            return

        dfd = self._sendmail(rcpts, msg.as_string())
        dfd.addCallbacks(self._sent_ok,
                         self._sent_failed,
                         callbackArgs=[to, cc, subject,
                                       len(attachs)],
                         errbackArgs=[to, cc, subject,
                                      len(attachs)])
        reactor.addSystemEventTrigger('before', 'shutdown', lambda: dfd)
        return dfd
Ejemplo n.º 21
0
 def _get_telnet_vars(self):
     # Note: if you add entries here also update topics/telnetconsole.rst
     telnet_vars = {
         'engine': scrapymanager.engine,
         'manager': scrapymanager,
         'extensions': extensions,
         'stats': stats,
         'spiders': spiders,
         'settings': settings,
         'est': print_engine_status,
         'p': pprint.pprint,
         'prefs': print_live_refs,
         'hpy': hpy,
     }
     send_catch_log(update_telnet_vars, telnet_vars=telnet_vars)
     return telnet_vars
Ejemplo n.º 22
0
    def test_send_catch_log(self):
        handlers_called = set()

        def test_handler_error(arg):
            handlers_called.add(test_handler_error)
            a = 1/0

        def test_handler_check(arg):
            handlers_called.add(test_handler_check)
            assert arg == 'test'
            return "OK"

        def log_received(message, level):
            handlers_called.add(log_received)
            assert "test_handler_error" in message
            assert level == log.ERROR

        dispatcher.connect(log_received, signal=log.logmessage_received)
        dispatcher.connect(test_handler_error, signal=test_signal)
        dispatcher.connect(test_handler_check, signal=test_signal)
        result = send_catch_log(test_signal, arg='test')

        assert test_handler_error in handlers_called
        assert test_handler_check in handlers_called
        assert log_received in handlers_called
        self.assertEqual(result[0][0], test_handler_error)
        self.assert_(isinstance(result[0][1], Exception))
        self.assertEqual(result[1], (test_handler_check, "OK"))

        dispatcher.disconnect(log_received, signal=log.logmessage_received)
        dispatcher.disconnect(test_handler_error, signal=test_signal)
        dispatcher.disconnect(test_handler_check, signal=test_signal)
Ejemplo n.º 23
0
 def _finish_closing_spider(self, spider):
     """This function is called after the spider has been closed"""
     reason = self.closing.pop(spider, 'finished')
     send_catch_log(signal=signals.spider_closed, sender=self.__class__, \
         spider=spider, reason=reason)
     call = self._next_request_calls.pop(spider, None)
     if call and call.active():
         call.cancel()
     dfd = defer.maybeDeferred(stats.close_spider, spider, reason=reason)
     dfd.addErrback(log.err, "Unhandled error in stats.close_spider()",
         spider=spider)
     dfd.addBoth(lambda _: spiders.close_spider(spider))
     dfd.addErrback(log.err, "Unhandled error in spiders.close_spider()",
         spider=spider)
     dfd.addBoth(lambda _: log.msg("Spider closed (%s)" % reason, spider=spider))
     dfd.addBoth(lambda _: self._spider_closed_callback(spider))
     return dfd
Ejemplo n.º 24
0
        def process_response(response):
            assert response is not None, 'Received None in process_response'
            if isinstance(response, Request):
                return response

            for method in self.response_middleware:
                response = method(request=request, response=response, spider=spider)
                assert isinstance(response, (Response, Request)), \
                    'Middleware %s.process_response must return Response or Request, got %s' % \
                    (method.im_self.__class__.__name__, type(response))
                if isinstance(response, Request):
                    send_catch_log(signal=signals.response_received, \
                        sender=self.__class__, response=response, spider=spider)
                    return response
            send_catch_log(signal=signals.response_received, sender=self.__class__, \
                response=response, spider=spider)
            return response
Ejemplo n.º 25
0
 def _itemproc_finished(self, output, item, spider):
     """ItemProcessor finished for the given ``item`` and returned ``output``
     """
     self.sites[spider].itemproc_size -= 1
     if isinstance(output, Failure):
         ex = output.value
         if isinstance(ex, DropItem):
             log.msg("Dropped %s - %s" % (item, str(ex)), level=log.WARNING, spider=spider)
             send_catch_log(signal=signals.item_dropped, sender=self.__class__, \
                 item=item, spider=spider, exception=output.value)
         else:
             log.msg('Error processing %s - %s' % (item, output), \
                 log.ERROR, spider=spider)
     else:
         log.msg("Passed %s" % item, log.INFO, spider=spider)
         send_catch_log(signal=signals.item_passed, sender=self.__class__, \
             item=item, spider=spider, output=output)
Ejemplo n.º 26
0
 def _process_spidermw_output(self, output, request, response, spider):
     """Process each Request/Item (given in the output parameter) returned
     from the given spider
     """
     if isinstance(output, Request):
         send_catch_log(signal=signals.request_received, request=output, \
             spider=spider)
         self.crawler.engine.crawl(request=output, spider=spider)
     elif isinstance(output, BaseItem):
         self.slots[spider].itemproc_size += 1
         dfd = self.itemproc.process_item(output, spider)
         dfd.addBoth(self._itemproc_finished, output, response, spider)
         return dfd
     elif output is None:
         pass
     else:
         log.msg("Spider must return Request, BaseItem or None, got %r in %s" % \
             (type(output).__name__, request), log.ERROR, spider=spider)
Ejemplo n.º 27
0
    def send_catch_log(self, signal, **kwargs):
        """
        Send a signal, catch exceptions and log them.

        The keyword arguments are passed to the signal handlers (connected
        through the :meth:`connect` method).
        """
        kwargs.setdefault('sender', self.sender)
        return _signal.send_catch_log(signal, **kwargs)
Ejemplo n.º 28
0
 def _get_telnet_vars(self):
     # Note: if you add entries here also update topics/telnetconsole.rst
     telnet_vars = {
         'engine': crawler.engine,
         'manager': crawler,
         'extensions': crawler.extensions,
         'stats': stats,
         'spiders': crawler.spiders,
         'settings': settings,
         'est': print_engine_status,
         'p': pprint.pprint,
         'prefs': print_live_refs,
         'hpy': hpy,
         'help': "This is Scrapy telnet console. For more info see: " \
             "http://doc.scrapy.org/topics/telnetconsole.html", # see #284
     }
     send_catch_log(update_telnet_vars, telnet_vars=telnet_vars)
     return telnet_vars
Ejemplo n.º 29
0
    def send_catch_log(self, signal, **kwargs):
        """
        Send a signal, catch exceptions and log them.

        The keyword arguments are passed to the signal handlers (connected
        through the :meth:`connect` method).
        """
        kwargs.setdefault('sender', self.sender)
        return _signal.send_catch_log(signal, **kwargs)
Ejemplo n.º 30
0
    def send(self, to, subject, body, cc=None, attachs=()):
        if attachs:
            msg = MIMEMultipart()
        else:
            msg = MIMENonMultipart('text', 'plain')
        msg['From'] = self.mailfrom
        msg['To'] = COMMASPACE.join(to)
        msg['Date'] = formatdate(localtime=True)
        msg['Subject'] = subject
        rcpts = to[:]
        if cc:
            rcpts.extend(cc)
            msg['Cc'] = COMMASPACE.join(cc)

        if attachs:
            msg.attach(MIMEText(body))
            for attach_name, mimetype, f in attachs:
                part = MIMEBase(*mimetype.split('/'))
                part.set_payload(f.read())
                Encoders.encode_base64(part)
                part.add_header('Content-Disposition', 'attachment; filename="%s"' \
                    % attach_name)
                msg.attach(part)
        else:
            msg.set_payload(body)

        send_catch_log(signal=mail_sent, to=to, subject=subject, body=body,
                       cc=cc, attach=attachs, msg=msg)

        if settings.getbool('MAIL_DEBUG'):
            log.msg('Debug mail sent OK: To=%s Cc=%s Subject="%s" Attachs=%d' % \
                (to, cc, subject, len(attachs)), level=log.DEBUG)
            return

        dfd = self._sendmail(rcpts, msg.as_string())
        dfd.addCallbacks(self._sent_ok, self._sent_failed,
            callbackArgs=[to, cc, subject, len(attachs)],
            errbackArgs=[to, cc, subject, len(attachs)])
        reactor.addSystemEventTrigger('before', 'shutdown', lambda: dfd)
        return dfd
Ejemplo n.º 31
0
    def put(self, url, name, cat, price, collection_name=None):
        uid = get_uid(url)
        domain = get_domain(url)
        crawl_time = int(time.time()) 
        item = self.get(url, collection_name)
        if item:
            if item.add_price(price, crawl_time):
                self.dbclient.update_field(uid, collection_name, 
                    data=item.data,
                    bottom_price=item.bottom_price)
            else:
                log.msg('duplicate price')
                return
        else: 
            item = {   "url":url, 'uid': uid,  "name":name, 
                    "cat":cat, "data":[(price, crawl_time)], 
                    "bottom_price":(price, crawl_time), 
                    "domain":domain
                 }
            self.dbclient.insert(item, uid, collection_name)

        send_catch_log(signal=signals.item_saved, item=item)
Ejemplo n.º 32
0
 def _process_spidermw_output(self, output, request, response, spider):
     """Process each Request/Item (given in the output parameter) returned
     from the given spider
     """
     if isinstance(output, Request):
         send_catch_log(signal=signals.request_received, request=output, \
             spider=spider)
         self.engine.crawl(request=output, spider=spider)
     elif isinstance(output, BaseItem):
         log.msg(log.formatter.scraped(output, request, response, spider), \
             level=log.DEBUG, spider=spider)
         self.sites[spider].itemproc_size += 1
         dfd = send_catch_log_deferred(signal=signals.item_scraped, \
             item=output, spider=spider, response=response)
         dfd.addBoth(lambda _: self.itemproc.process_item(output, spider))
         dfd.addBoth(self._itemproc_finished, output, spider)
         return dfd
     elif output is None:
         pass
     else:
         log.msg("Spider must return Request, BaseItem or None, got %r in %s" % \
             (type(output).__name__, request), log.ERROR, spider=spider)
Ejemplo n.º 33
0
 def _process_spidermw_output(self, output, request, response, spider):
     """Process each Request/Item (given in the output parameter) returned
     from the given spider
     """
     if isinstance(output, Request):
         send_catch_log(signal=signals.request_received, request=output, \
             spider=spider)
         self.engine.crawl(request=output, spider=spider)
     elif isinstance(output, BaseItem):
         log.msg(log.formatter.scraped(output, request, response, spider), \
             level=log.DEBUG, spider=spider)
         self.sites[spider].itemproc_size += 1
         dfd = send_catch_log_deferred(signal=signals.item_scraped, \
             item=output, spider=spider, response=response)
         dfd.addBoth(lambda _: self.itemproc.process_item(output, spider))
         dfd.addBoth(self._itemproc_finished, output, spider)
         return dfd
     elif output is None:
         pass
     else:
         log.msg("Spider must return Request, BaseItem or None, got %r in %s" % \
             (type(output).__name__, request), log.ERROR, spider=spider)
Ejemplo n.º 34
0
 def _get_telnet_vars(self):
     # Note: if you add entries here also update topics/telnetconsole.rst
     slots = self.crawler.engine.slots
     if len(slots) == 1:
         spider, slot = slots.items()[0]
     telnet_vars = {
         'engine': self.crawler.engine,
         'spider': spider,
         'slot': slot,
         'manager': self.crawler,
         'extensions': self.crawler.extensions,
         'stats': stats,
         'spiders': self.crawler.spiders,
         'settings': self.crawler.settings,
         'est': lambda: print_engine_status(self.crawler.engine),
         'p': pprint.pprint,
         'prefs': print_live_refs,
         'hpy': hpy,
         'help': "This is Scrapy telnet console. For more info see: " \
             "http://doc.scrapy.org/en/latest/topics/telnetconsole.html",
     }
     send_catch_log(update_telnet_vars, telnet_vars=telnet_vars)
     return telnet_vars
Ejemplo n.º 35
0
    def _spider_idle(self, spider):
        """Called when a spider gets idle. This function is called when there
        are no remaining pages to download or schedule. It can be called
        multiple times. If some extension raises a DontCloseSpider exception
        (in the spider_idle signal handler) the spider is not closed until the
        next loop and this function is guaranteed to be called (at least) once
        again for this spider.
        """
        res = send_catch_log(signal=signals.spider_idle, spider=spider, dont_log=DontCloseSpider)
        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) for _, x in res):
            reactor.callLater(5, self.next_request, spider)
            return

        if self.spider_is_idle(spider):
            self.close_spider(spider, reason="finished")
Ejemplo n.º 36
0
    def _spider_idle(self, spider):
        """Called when a spider gets idle. This function is called when there
        are no remaining pages to download or schedule. It can be called
        multiple times. If some extension raises a DontCloseSpider exception
        (in the spider_idle signal handler) the spider is not closed until the
        next loop and this function is guaranteed to be called (at least) once
        again for this spider.
        """
        res = send_catch_log(signal=signals.spider_idle, \
            spider=spider, dont_log=DontCloseSpider)
        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
                for _, x in res):
            self.slots[spider].nextcall.schedule(5)
            return

        if self.spider_is_idle(spider):
            self.close_spider(spider, reason='finished')
Ejemplo n.º 37
0
 def _downloaded(response):
     send_catch_log(signal=signals.response_downloaded, \
             response=response, request=request, spider=spider)
     return response
Ejemplo n.º 38
0
 def _deactivate(response):
     send_catch_log(signal=signals.response_received, \
         response=response, request=request, spider=spider)
     site.active.remove(request)
     self._close_if_idle(spider)
     return response
Ejemplo n.º 39
0
 def _get_result(self, signal, *a, **kw):
     return send_catch_log(signal, *a, **kw)
Ejemplo n.º 40
0
 def _get_result(self, signal, *a, **kw):
     return send_catch_log(signal, *a, **kw)
Ejemplo n.º 41
0
 def send_catch_log(self, *a, **kw):
     kw.setdefault('sender', self.sender)
     return signal.send_catch_log(*a, **kw)
Ejemplo n.º 42
0
 def open_spider(self, spider):
     self._stats[spider] = {}
     send_catch_log(stats_spider_opened, spider=spider)
Ejemplo n.º 43
0
 def open_spider(self, spider):
     self._stats[spider] = {}
     send_catch_log(stats_spider_opened, spider=spider)
Ejemplo n.º 44
0
 def _downloaded(response):
     send_catch_log(signal=signals.response_downloaded, \
             response=response, request=request, spider=spider)
     return response