def test_start_crawler(self, tlog, load_object, crwlr): spider_loc = 'ABC.ABC' flask_app_config = self.get_flask_export_config(True, True) spider_scrapy_settings = { 'ITEM_PIPELINES': self.get_item_export_pipeline(False, True) } start_crawler(spider_loc, flask_app_config, spider_scrapy_settings) assert tlog.startLogging.called load_object.assert_called_with(spider_loc)
def run_spider_endpoint(spider_name): """Search for the spider_name in the SPIDER_SETTINGS dict and start running the spider with the Scrapy API""" for item in app.config['SPIDER_SETTINGS']: if spider_name in item['endpoint']: spider_loc = '%s.%s' % (item['location'], item['spider']) start_crawler(spider_loc, app.config) return jsonify(status='<%s> running'% spider_name) return abort(404)
def run_spider_endpoint(spider_name): """Search for the spider_name in the SPIDER_SETTINGS dict and start running the spider with the Scrapy API""" for item in app.config['SPIDER_SETTINGS']: if spider_name in item['endpoint']: spider_loc = '%s.%s' % (item['location'], item['spider']) start_crawler(spider_loc, app.config, item.get('scrapy_settings')) return jsonify(status='<%s> running'% spider_name) return abort(404)
def run_spider_endpoint(spider_name): """Search for the spider_name in the SPIDER_SETTINGS dict and start running the spider with the Scrapy API .. version 0.4.0: endpoint returns the `status` as `running` and a way to go back to `home` endpoint """ for item in app.config['SPIDER_SETTINGS']: if spider_name in item['endpoint']: spider_loc = '%s.%s' % (item['location'], item['spider']) start_crawler(spider_loc, app.config, item.get('scrapy_settings')) return jsonify(home=request.url_root, status='running', spider_name=spider_name) return abort(404)
def test_start_crawler(self): if SCRAPY_VERSION <= (1, 0, 0): with patch('arachne.scrapy_utils.tlog') as tlog: with patch('arachne.scrapy_utils.load_object') as load_object: with patch('arachne.scrapy_utils.Crawler') as _: spider_loc = 'ABC.ABC' flask_app_config = get_flask_export_config(True, True) spider_scrapy_settings = { 'EXTENSIONS': get_item_export_extension(False, True) } start_crawler(spider_loc, flask_app_config, spider_scrapy_settings) assert tlog.startLogging.called load_object.assert_called_with(spider_loc)