Example #1
0
class Executor(object):
    def __init__(self, settings, spider_cls_list):
        self.runner = CrawlerRunner(settings)
        self.spider_cls_list = spider_cls_list
        self.logger = logging.getLogger('Executor')
        self.interrupted = False

        dispatcher.connect(self._handle_spider_close, signal=signals.spider_closed)

        signal.signal(signal.SIGTERM, self._handler_sys_signal)
        signal.signal(signal.SIGINT, self._handler_sys_signal)

    @defer.inlineCallbacks
    def _crawl(self):
        while True:
            finished = True

            for spider_cls in self.spider_cls_list:
                if self.interrupted:
                    self.logger.info('interrupted, cancel pending jobs')
                    break

                if spider_cls.get_dataset().count() > 0:
                    finished = False
                    logging.info(
                        'Starting {:s}'.format(spider_cls.__name__))

                    yield self.runner.crawl(spider_cls)

            if finished or self.interrupted:
                break

        reactor.stop()
        self.logger.info('finished')

        if not self.interrupted:
            send_mail('finished')

    def execute(self):
        self._crawl()
        reactor.run(installSignalHandlers=False)

    def _handle_spider_close(self, spider, reason):
        self.logger.info('Spider "{:s}" stopped with reason "{:s}"'.format(spider.name, reason))

        if reason not in ['finished', 'shutdown']:
            self.logger.error('Spider stopped abnormally. Stopping CrawlerRunner')

            self.interrupted = True
            self.runner.stop()

            send_mail(reason)

    def _handler_sys_signal(self, signum, frame):
        self.logger.info('KeyboardInterrupt. Stopping CrawlerRunner')
        self.interrupted = True
        self.runner.stop()
Example #2
0
class CrawlerRunnerTest(unittest.TestCase):

    def setUp(self):
        self.crawler_runner = CrawlerRunner(Settings())

    def tearDown(self):
        return self.crawler_runner.stop()

    @defer.inlineCallbacks
    def test_populate_spidercls_settings(self):
        spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
        project_settings = {'TEST1': 'project', 'TEST3': 'project'}

        class CustomSettingsSpider(DefaultSpider):
            custom_settings = spider_settings

        self.crawler_runner.settings.setdict(project_settings,
                                             priority='project')

        d = self.crawler_runner.crawl(CustomSettingsSpider)
        crawler = list(self.crawler_runner.crawlers)[0]
        yield d
        self.assertEqual(crawler.settings.get('TEST1'), 'spider')
        self.assertEqual(crawler.settings.get('TEST2'), 'spider')
        self.assertEqual(crawler.settings.get('TEST3'), 'project')
Example #3
0
class Bot(Resource):
    spider = SlydSpider()

    def __init__(self, settings, spec_manager):
        # twisted base class is old-style so we cannot user super()
        Resource.__init__(self)
        self.spec_manager = spec_manager
        settings.set('PLUGINS', [p['bot'] for p in settings.get('PLUGINS')])
        self.runner = CrawlerRunner(settings)
        log.msg("bot initialized", level=log.DEBUG)

    def keep_spider_alive(self, spider):
        raise DontCloseSpider("keeping it open")

    def stop(self):
        """Stop the crawler"""
        self.runner.stop()
        log.msg("bot stopped", level=log.DEBUG)
Example #4
0
class Bot(Resource):
    spider = SlydSpider()

    def __init__(self, settings, spec_manager):
        # twisted base class is old-style so we cannot user super()
        Resource.__init__(self)
        self.spec_manager = spec_manager
        settings.set('PLUGINS', [p['bot'] for p in settings.get('PLUGINS')])
        self.runner = CrawlerRunner(settings)
        log.msg("bot initialized", level=log.DEBUG)

    def keep_spider_alive(self, spider):
        raise DontCloseSpider("keeping it open")

    def stop(self):
        """Stop the crawler"""
        self.runner.stop()
        log.msg("bot stopped", level=log.DEBUG)
Example #5
0
class Runner(object):
    def __init__(self,*args,**kwargs): 
        configure_logging()
        self.settings = get_project_settings()
        self.runner = CrawlerRunner(self.settings) 

    def add(self,*a,**kw):  
        crawler = Crawler(BroadSpider,self.settings) 
        self.runner.crawl(crawler,*a,**kw)

    def start(self): 
        d = self.runner.join()
        d.addBoth(lambda _: reactor.stop()) 
        reactor.run()

    def stop(self):
        self.runner.stop()
        reactor.stop()
Example #6
0
class Runner(object):
    def __init__(self, *args, **kwargs):
        configure_logging()
        self.settings = get_project_settings()
        self.runner = CrawlerRunner(self.settings)

    def add(self, *a, **kw):
        crawler = Crawler(BroadSpider, self.settings)
        self.runner.crawl(crawler, *a, **kw)

    def start(self):
        d = self.runner.join()
        d.addBoth(lambda _: reactor.stop())
        reactor.run()

    def stop(self):
        self.runner.stop()
        reactor.stop()
Example #7
0
class CrawlerRunnerTest(unittest.TestCase):
    def setUp(self):
        self.crawler_runner = CrawlerRunner(Settings())

    def tearDown(self):
        return self.crawler_runner.stop()

    @defer.inlineCallbacks
    def test_populate_spidercls_settings(self):
        spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
        project_settings = {'TEST1': 'project', 'TEST3': 'project'}

        class CustomSettingsSpider(DefaultSpider):
            custom_settings = spider_settings

        self.crawler_runner.settings.setdict(project_settings,
                                             priority='project')

        d = self.crawler_runner.crawl(CustomSettingsSpider)
        crawler = list(self.crawler_runner.crawlers)[0]
        yield d
        self.assertEqual(crawler.settings.get('TEST1'), 'spider')
        self.assertEqual(crawler.settings.get('TEST2'), 'spider')
        self.assertEqual(crawler.settings.get('TEST3'), 'project')
Example #8
0
class checkpointed_crawler(object):
    """ Crawl with checkpointing enabled for crash recovery. Multiple instances
    of this class enables multiple spiders to simultaneously crawl within a
    single process.

    Args:
        spider_name     name of spider within this project
        cp_interval     interval, in seconds, between checkpoints;
                        expected to be in 100's at the minimum
        add_settings    additional settings specific to this spider
    Preconditions:
        JOBDIR must be set either in add_settings or in settings.py
    """

    # state of crawler
    FINISHED = 'finished'
    RUNNING = 'running'

    def __init__(self, spider_name: str, cp_interval: int,
                 add_settings: dict = {}):
        self._cps = 0
        self._cp_int = cp_interval
        self._cp_path = None
        self._runner = None
        self._settings = get_project_settings().copy()
        self._spider = spider_name
        self._state = None

        configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})

        # update settings
        for k, v in add_settings.items():
            self._settings[k] = v

        # set path to folder to checkpoint
        if 'JOBDIR' not in self._settings:
            raise KeyError('JOBDIR not set')
        self._cp_path = add_settings['JOBDIR']

        # restore checkpoint, if any
        logger.info("Restoring checkpoint: {}".format(self._cp_path))
        checkpoint.restore_checkpoint(self._cp_path)
        logger.info("Checkpoint successfully restored: {}"
                    .format(self._cp_path))

        # create runner
        self._runner = CrawlerRunner(self._settings)

        # start crawling
        self._start_crawling()

    def _start_crawling(self):
        # start crawling
        dfd_finish = self._runner.crawl(self._spider)
        self._state = self.RUNNING

        # add finishing deferred call
        dfd_finish.addCallback(self._finish)

        # schedule a checkpoint
        reactor.callLater(self._cp_int, self._stop)

    def _finish(self, _):
        # ignore triggers due to checkpointing
        if self._cps > 0:
            self._cps -= 1
            return

        # clear saved checkpoints
        logger.info("Clearing checkpoint: {}".format(self._cp_path))
        checkpoint.clear_checkpoint(self._cp_path)
        logger.info("Successfully cleared checkpoint: {}"
                    .format(self._cp_path))
        self._state = self.FINISHED

        # stop reactor
        reactor.stop()

    def _stop(self):
        # ignore event if crawler has finished
        if self._state == self.FINISHED:
            return

        # stop crawling and trigger checkpointing
        self._cps += 1
        dfd_stop = self._runner.stop()
        dfd_stop.addCallback(self._checkpoint)

    def _checkpoint(self, _):
        # checkpoint state
        logger.info("Creating checkpoint: {}".format(self._cp_path))
        checkpoint.create_checkpoint(self._cp_path)
        logger.info("Successfully created checkpoint: {}"
                    .format(self._cp_path))

        # start crawling again
        self._start_crawling()

    @staticmethod
    def start_crawler():
        """ Start executing all the crawlers.
        """
        reactor.run()
Example #9
0
class ScrAPI(Flask):
    def __init__(self, import_name=__package__, **kwargs):
        super(ScrAPI, self).__init__(import_name, **kwargs)
        configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
        self._init_url_rules()
        self.process = CrawlerRunner(get_project_settings())
        self.tp = reactor.getThreadPool()
        self.database = DatabaseConnector(DATABASE_URL)
        self.response_meta = {"meta": {"project": "WSF Web Scraper"}}

    def __del__(self):
        self.database._close_all_spiders()
        self.database.cursor.close()
        self.database.connection.close()

    def run(self, host=None, port=None, debug=None, **options):
        super(ScrAPI, self).run(host, port, debug, **options)

    def _get_meta_response(self, res):
        res.update(self.response_meta)
        return res

    def _init_url_rules(self):
        """Attach the endpoints to run spiders and list the spiders
        that are available in the API
        """

        self.add_url_rule(
            '/spiders',
            view_func=self.list_spiders,
            methods=['GET'],
        )
        self.add_url_rule(
            '/spiders',
            view_func=self.run_spider,
            methods=['POST'],
        )
        self.add_url_rule(
            '/spiders/<int:spider_id>',
            view_func=self.close_spider,
            methods=['DELETE'],
        )
        self.add_url_rule(
            '/database',
            view_func=self.import_db,
            methods=['POST'],
        )
        self.add_url_rule(
            '/database',
            view_func=self.export_db,
            methods=['GET'],
        )
        self.add_url_rule(
            '/database',
            view_func=self.clear_scraps,
            methods=['DELETE'],
        )
        self.add_url_rule(
            '/crawls',
            view_func=self.list_crawls,
            methods=['GET'],
        )
        self.add_url_rule(
            '/crawls',
            view_func=self.stop,
            methods=['DELETE'],
        )
        self.add_url_rule(
            '/',
            view_func=self.home,
            methods=['GET'],
        )

    def home(self):
        routes = [{
            "url": "/spiders",
            "method": "GET"
        }, {
            "url": "/spiders",
            "method": "POST",
            "arguments": {
                "spider": "name of the spider to run"
            }
        }, {
            "url": "/spiders/:spider_id",
            "method": "DELETE",
            "arguments": {
                "spider_id": "uuid of the spider to close"
            }
        }, {
            "url": "/crawls",
            "method": "GET"
        }, {
            "url": "/crawls",
            "method": "DELETE"
        }, {
            "url": "/database",
            "method": "GET"
        }, {
            "url": "/database",
            "method": "POST",
            "arguments": {
                "file": "json file containing the database dump"
            }
        }, {
            "url": "/database",
            "method": "DELETE"
        }]
        result = self._get_meta_response({"routes": routes})
        return jsonify(result), 200

    def list_spiders(self):
        spiders = self.process.spider_loader.list()
        return jsonify({"spiders": spiders, "status": "success"}), 200

    def run_spider(self):
        post_data = request.get_json()
        spider = post_data.get('spider')
        if spider == 'who_iris':
            spider = who_iris_spider.WhoIrisSpider()
        elif spider == 'nice':
            spider = nice_spider.NiceSpider()
        else:
            return '', 404
        spider_id = str(uuid.uuid4())
        self.process.crawl(spider, uuid=spider_id)
        crawl = self.process.join()
        self.database.insert_spider(spider.name, spider_id)
        crawl.addBoth(self.on_success)
        return jsonify({
            "data": {
                "status": "running",
                "spider": spider.name,
                "_id": spider_id
            }
        }), 200

    def on_success(self, data):
        self.database._close_all_spiders()

    def close_spider(self, spider_id):
        for crawl in self.process.crawlers:
            if crawl.spider.uuid == uuid:
                crawl.stop()
                return jsonify(
                    {"data": {
                        "status": "success",
                        "_id": spider_id
                    }}), 200
        return '', 400

    def list_crawls(self):
        crawls = self.process.crawlers
        running_spiders = []
        for crawl in crawls:
            start_time = crawl.stats.get_value('start_time')
            spider = {
                '_id':
                crawl.spider.uuid,
                'spider':
                crawl.spider.name,
                'start_time':
                start_time,
                'total_time':
                str(datetime.now() - start_time),
                'item_dropped':
                crawl.stats.get_value('item_dropped_count'),
                'item_scraped':
                crawl.stats.get_value('item_scraped_count'),
                'total_requests':
                crawl.stats.get_value('downloader/request_count'),
            }

            running_spiders.append(spider)
        finished_spiders = []
        for spider in self.database.get_finished_crawls():
            finished_spiders.append(spider)
        spiders = {"crawling": running_spiders, "finished": finished_spiders}
        return jsonify({"data": {"spiders": spiders}}), 200

    def stop(self):
        self.process.stop()
        return jsonify({"data": {"status": "success"}}), 200

    def export_db(self):
        articles_rows = self.database.get_articles()
        articles = []
        now = datetime.now()
        for title, file_hash, url in articles_rows:
            articles.append({
                'title': title,
                'file_hash': file_hash,
                'url': url,
            })
        json_file = tempfile.NamedTemporaryFile()
        json_file.write(json.dumps(articles).encode('utf-8'))
        json_file.seek(0)
        return send_file(json_file,
                         mimetype='application/json',
                         as_attachment=True,
                         attachment_filename=f'export-{now}.json')

    def import_db(self):
        if request.files:
            data_file = request.files.get('file')
            if data_file.filename == '':
                return 'Filename must not be blank', 400
            if data_file.content_type == 'application/json':
                json_file = data_file.stream.read()
            else:
                return 'File format is not json.', 415

            try:
                json_dict = json.loads(json_file)
                for article in json_dict:
                    self.database.insert_article(article.get('title'),
                                                 article.get('file_hash'),
                                                 article.get('url'))

                return '', 201
            except Exception as e:
                result = {"errors": [str(e)]}
                return jsonify(result), 400
        else:
            return 'No JSON file in request', 400

    def clear_scraps(self):
        try:
            self.database.reset_scraped()
            return '', 204
        except Exception as e:
            return str(e), 500
class FronteraSchedulerTest(TestCase):
    def setUp(self):
        self.runner = CrawlerRunner()

    def tearDown(self):
        self.runner.stop()
        while TestDownloadHandler.results:
            TestDownloadHandler.results.pop()

    @defer.inlineCallbacks
    def test_start_requests(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results([
                Response(url='http://example.com'),
                Response(url='http://example2.com')
            ])

            with patch(
                    'frontera.contrib.backends.memory.MemoryBaseBackend.links_extracted'
            ) as mocked_links_extracted:
                mocked_links_extracted.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                crawler = get_crawler(TestSpider, settings)

                yield self.runner.crawl(crawler)
                self.assertTrue(crawler.spider.success)
                self.assertTrue(crawler.spider.success2)
                mocked_links_extracted.assert_not_called()

    @defer.inlineCallbacks
    def test_cf_store(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results(
                [Response(url='http://example.com', body=b'cf_store')])

            with patch(
                    'frontera.contrib.backends.memory.MemoryDequeQueue.schedule'
            ) as mocked_schedule:
                mocked_schedule.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                crawler = get_crawler(TestSpider, settings)

                yield self.runner.crawl(crawler)
                self.assertTrue(crawler.spider.success)
                self.assertEqual(mocked_schedule.call_count, 1)

    @defer.inlineCallbacks
    def test_callback_requests_to_frontier(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results(
                [Response(url='http://example.com')])

            with patch(
                    'frontera.contrib.backends.memory.MemoryDequeQueue.schedule'
            ) as mocked_schedule:
                mocked_schedule.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                settings.setdict({
                    'FRONTERA_SCHEDULER_REQUEST_CALLBACKS_TO_FRONTIER':
                    ['parse2'],
                })
                crawler = get_crawler(TestSpider2, settings)

                yield self.runner.crawl(crawler)
                self.assertTrue(crawler.spider.success)
                self.assertFalse(crawler.spider.success2)
                self.assertEqual(mocked_schedule.call_count, 1)

    @defer.inlineCallbacks
    def test_callback_requests_to_frontier_with_implicit_callback(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results([
                Response(url='http://example.com'),
                Response(url='http://example2.com')
            ])

            with patch(
                    'frontera.contrib.backends.memory.MemoryDequeQueue.schedule'
            ) as mocked_schedule:
                mocked_schedule.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                settings.setdict({
                    'FRONTERA_SCHEDULER_REQUEST_CALLBACKS_TO_FRONTIER':
                    ['parse'],
                })
                crawler = get_crawler(TestSpider3, settings)

                yield self.runner.crawl(crawler)
                self.assertEqual(crawler.spider.success, 1)
                self.assertEqual(mocked_schedule.call_count, 1)

    @defer.inlineCallbacks
    def test_callback_requests_slot_map(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            resp1 = Response(url='http://example.com')
            resp2 = Response(url='http://example2.com')
            mocked_handler.return_value.set_results([resp1, resp2])

            with patch(
                    'frontera.contrib.backends.memory.MemoryDequeQueue.schedule'
            ) as mocked_schedule:
                mocked_schedule.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                settings.setdict({
                    'FRONTERA_SCHEDULER_REQUEST_CALLBACKS_TO_FRONTIER':
                    ['parse'],
                    'FRONTERA_SCHEDULER_CALLBACK_SLOT_PREFIX_MAP': {
                        'parse': 'myslot'
                    },
                })
                crawler = get_crawler(TestSpider3, settings)

                yield self.runner.crawl(crawler)
                self.assertEqual(crawler.spider.success, 1)
                self.assertEqual(mocked_schedule.call_count, 1)
                frontera_request = mocked_schedule.call_args_list[0][0][0][0][
                    2]
                self.assertEqual(frontera_request.url, resp2.url)
                self.assertEqual(
                    frontera_request.meta[b'frontier_slot_prefix'], 'myslot')

    @defer.inlineCallbacks
    def test_callback_requests_slot_map_with_num_slots(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            resp1 = Response(url='http://example.com')
            resp2 = Response(url='http://example2.com')
            mocked_handler.return_value.set_results([resp1, resp2])

            with patch(
                    'frontera.contrib.backends.memory.MemoryDequeQueue.schedule'
            ) as mocked_schedule:
                mocked_schedule.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                settings.setdict({
                    'FRONTERA_SCHEDULER_REQUEST_CALLBACKS_TO_FRONTIER':
                    ['parse'],
                    'FRONTERA_SCHEDULER_CALLBACK_SLOT_PREFIX_MAP': {
                        'parse': 'myslot/5'
                    },
                })
                crawler = get_crawler(TestSpider3, settings)

                yield self.runner.crawl(crawler)
                self.assertEqual(crawler.spider.success, 1)
                self.assertEqual(mocked_schedule.call_count, 1)
                frontera_request = mocked_schedule.call_args_list[0][0][0][0][
                    2]
                self.assertEqual(frontera_request.url, resp2.url)
                self.assertEqual(
                    frontera_request.meta[b'frontier_slot_prefix'], 'myslot')
                self.assertEqual(
                    frontera_request.meta[b'frontier_number_of_slots'], 5)

    @defer.inlineCallbacks
    def test_start_requests_to_frontier(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results([
                Response(url='http://example.com'),
                Response(url='http://example2.com')
            ])

            settings = Settings()
            settings.setdict(TEST_SETTINGS, priority='cmdline')
            settings.setdict({
                'FRONTERA_SCHEDULER_START_REQUESTS_TO_FRONTIER':
                True,
            })
            crawler = get_crawler(TestSpider, settings)

            yield self.runner.crawl(crawler)
            self.assertTrue(crawler.spider.success)
            self.assertTrue(crawler.spider.success2)

    @defer.inlineCallbacks
    def test_start_requests_to_frontier_ii(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()

            with patch(
                    'frontera.contrib.backends.memory.MemoryBaseBackend.add_seeds'
            ) as mocked_add_seeds:
                mocked_add_seeds.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                settings.setdict({
                    'FRONTERA_SCHEDULER_START_REQUESTS_TO_FRONTIER':
                    True,
                })

                crawler = get_crawler(TestSpider, settings)

                yield self.runner.crawl(crawler)
                self.assertEqual(mocked_add_seeds.call_count, 1)

    @defer.inlineCallbacks
    def test_start_handle_errback(self):
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results([
                Response(url='http://example.com'),
                Response(url='http://example2.com', status=501),
                Response(url='http://example3.com')
            ])

            settings = Settings()
            settings.setdict(TEST_SETTINGS, priority='cmdline')
            crawler = get_crawler(TestSpider, settings)

            yield self.runner.crawl(crawler)
            self.assertTrue(crawler.spider.success)
            self.assertFalse(crawler.spider.success2)
            self.assertTrue(crawler.spider.error)
            self.assertTrue(crawler.spider.success3)

    @defer.inlineCallbacks
    def test_start_handle_errback_with_cf_store(self):
        """
        Test that we get the expected result with errback cf_store
        """
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results([
                Response(url='http://example.com'),
                Response(url='http://example2.com',
                         status=501,
                         body=b'cf_store'),
                Response(url='http://example3.com')
            ])

            settings = Settings()
            settings.setdict(TEST_SETTINGS, priority='cmdline')
            crawler = get_crawler(TestSpider, settings)

            yield self.runner.crawl(crawler)
            self.assertTrue(crawler.spider.success)
            self.assertFalse(crawler.spider.success2)
            self.assertTrue(crawler.spider.error)
            self.assertTrue(crawler.spider.success3)

    @defer.inlineCallbacks
    def test_start_handle_errback_with_cf_store_ii(self):
        """
        Test that we scheduled cf_store request on backend queue
        """
        with patch(
                'scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler'
        ) as mocked_handler:
            mocked_handler.return_value = TestDownloadHandler()
            mocked_handler.return_value.set_results([
                Response(url='http://example.com'),
                Response(url='http://example2.com',
                         status=501,
                         body=b'cf_store'),
                Response(url='http://example3.com')
            ])

            with patch(
                    'frontera.contrib.backends.memory.MemoryDequeQueue.schedule'
            ) as mocked_schedule:
                mocked_schedule.return_value = None
                settings = Settings()
                settings.setdict(TEST_SETTINGS, priority='cmdline')
                crawler = get_crawler(TestSpider, settings)

                yield self.runner.crawl(crawler)
                self.assertTrue(crawler.spider.success)
                self.assertFalse(crawler.spider.success2)
                self.assertTrue(crawler.spider.error)
                self.assertEqual(mocked_schedule.call_count, 1)