def test_getcomposite(self): s = BaseSettings({ 'TEST_BASE': { 1: 1, 2: 2 }, 'TEST': BaseSettings({ 1: 10, 3: 30 }, 'default'), 'HASNOBASE': BaseSettings({1: 1}, 'default') }) s['TEST'].set(4, 4, priority='project') # When users specify a _BASE setting they explicitly don't want to use # Scrapy's defaults, so we don't want to see anything that has a # 'default' priority from TEST cs = s._getcomposite('TEST') self.assertEqual(len(cs), 3) self.assertEqual(cs[1], 1) self.assertEqual(cs[2], 2) self.assertEqual(cs[4], 4) cs = s._getcomposite('HASNOBASE') self.assertEqual(len(cs), 1) self.assertEqual(cs[1], 1) cs = s._getcomposite('NONEXISTENT') self.assertIsNone(cs)
def test_copy_to_dict(self): s = BaseSettings({ 'TEST_STRING': 'a string', 'TEST_LIST': [1, 2], 'TEST_BOOLEAN': False, 'TEST_BASE': BaseSettings({ 1: 1, 2: 2 }, 'project'), 'TEST': BaseSettings({ 1: 10, 3: 30 }, 'default'), 'HASNOBASE': BaseSettings({3: 3000}, 'default') }) self.assertDictEqual( s.copy_to_dict(), { 'HASNOBASE': { 3: 3000 }, 'TEST': { 1: 10, 3: 30 }, 'TEST_BASE': { 1: 1, 2: 2 }, 'TEST_BOOLEAN': False, 'TEST_LIST': [1, 2], 'TEST_STRING': 'a string' })
def test_update(self): settings = BaseSettings({'key_lowprio': 0}, priority=0) settings.set('key_highprio', 10, priority=50) custom_settings = BaseSettings({ 'key_lowprio': 1, 'key_highprio': 11 }, priority=30) custom_settings.set('newkey_one', None, priority=50) custom_dict = { 'key_lowprio': 2, 'key_highprio': 12, 'newkey_two': None } settings.update(custom_dict, priority=20) self.assertEqual(settings['key_lowprio'], 2) self.assertEqual(settings.getpriority('key_lowprio'), 20) self.assertEqual(settings['key_highprio'], 10) self.assertIn('newkey_two', settings) self.assertEqual(settings.getpriority('newkey_two'), 20) settings.update(custom_settings) self.assertEqual(settings['key_lowprio'], 1) self.assertEqual(settings.getpriority('key_lowprio'), 30) self.assertEqual(settings['key_highprio'], 10) self.assertIn('newkey_one', settings) self.assertEqual(settings.getpriority('newkey_one'), 50) settings.update({'key_lowprio': 3}, priority=20) self.assertEqual(settings['key_lowprio'], 1)
def from_crawler(cls, crawler: Crawler): base_settings: BaseSettings = crawler.settings cls.normalize(base_settings) settings = BaseSettings(priority='spider') cls.from_object(settings, crawler.spidercls.SpiderConfig) settings.update( {k: v for k, v in base_settings.items() if k in settings}, priority='cmdline') preset = base_settings.get('PRESET') if preset: preset_dict = BaseSettings(priority=35) cls.from_pyfile(preset_dict, preset) settings.update(preset_dict) adapted = BaseSettings(priority=50) for k, v in settings.items(): adapt = getattr(SettingsAdapter, k.lower(), None) if adapt: adapted.update(adapt(v)) else: adapted[k] = v base_settings.update(adapted.copy_to_dict(), priority=50) base_settings['SPIDER_CONFIG'] = adapted return cls()
def test_update_jsonstring(self): settings = BaseSettings({'number': 0, 'dict': BaseSettings({'key': 'val'})}) settings.update('{"number": 1, "newnumber": 2}') self.assertEqual(settings['number'], 1) self.assertEqual(settings['newnumber'], 2) settings.set("dict", '{"key": "newval", "newkey": "newval2"}') self.assertEqual(settings['dict']['key'], "newval") self.assertEqual(settings['dict']['newkey'], "newval2")
def test_getwithbase(self): s = BaseSettings({'TEST_BASE': BaseSettings({1: 1, 2: 2}, 'project'), 'TEST': BaseSettings({1: 10, 3: 30}, 'default'), 'HASNOBASE': BaseSettings({3: 3000}, 'default')}) s['TEST'].set(2, 200, 'cmdline') six.assertCountEqual(self, s.getwithbase('TEST'), {1: 1, 2: 200, 3: 30}) six.assertCountEqual(self, s.getwithbase('HASNOBASE'), s['HASNOBASE']) self.assertEqual(s.getwithbase('NONEXISTENT'), {})
def test_repr(self): settings = BaseSettings() self.assertEqual(repr(settings), "<BaseSettings {}>") attr = SettingsAttribute('testval', 15) settings['testkey'] = attr self.assertEqual(repr(settings), "<BaseSettings {'testkey': %s}>" % repr(attr))
def test_overwrite_basesettings(self): original_dict = {'one': 10, 'two': 20} original_settings = BaseSettings(original_dict, 0) attribute = SettingsAttribute(original_settings, 0) new_dict = {'three': 11, 'four': 21} attribute.set(new_dict, 10) self.assertIsInstance(attribute.value, BaseSettings) six.assertCountEqual(self, attribute.value, new_dict) six.assertCountEqual(self, original_settings, original_dict) new_settings = BaseSettings({'five': 12}, 0) attribute.set(new_settings, 0) # Insufficient priority six.assertCountEqual(self, attribute.value, new_dict) attribute.set(new_settings, 10) six.assertCountEqual(self, attribute.value, new_settings)
def __init__(self, values=None, priority='project'): super().__init__() self.setmodule(aio_settings, 'default') for name, val in self.items(): if isinstance(val, dict): self.set(name, BaseSettings(val, 'default'), 'default') self.update(values, priority)
def test_set_per_key_priorities(self): attribute = SettingsAttribute(BaseSettings({ 'one': 10, 'two': 20 }, 0), 0) new_dict = {'one': 11, 'two': 21} attribute.set(new_dict, 10) self.assertEqual(attribute.value['one'], 11) self.assertEqual(attribute.value['two'], 21) new_settings = BaseSettings() new_settings.set('one', 12, 20) new_settings.set('two', 12, 0) attribute.set(new_settings, 0) self.assertEqual(attribute.value['one'], 12) self.assertEqual(attribute.value['two'], 21)
def test_delete(self): settings = BaseSettings({'key': None}) settings.set('key_highprio', None, priority=50) settings.delete('key') settings.delete('key_highprio') self.assertNotIn('key', settings) self.assertIn('key_highprio', settings) del settings['key_highprio'] self.assertNotIn('key_highprio', settings)
def setUp(self): self.settings = BaseSettings({ 'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz', 'KINESISSTREAM_NAME': 'kinesis-stream', 'KENISISPARTITION_KEY': 'kinesis-partition-key' })
def test_update_settings_per_key_priorities_new_behaviour(): from scrapy.settings import BaseSettings test = EntrypointSettings() test.set('ITEM_PIPELINES', BaseSettings()) test['ITEM_PIPELINES'].update({'test.path1': 100}) test['ITEM_PIPELINES'].update({'test.path2': 200}) assert dict(test['ITEM_PIPELINES']) == { 'test.path1': 100, 'test.path2': 200 }
def test_json_gz(self): settings = BaseSettings({ 'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.json.gz', 'FEED_EXPORTERS_BASE': default_settings.FEED_EXPORTERS_BASE, }) pipeline = S3Pipeline(settings, None) self.assertTrue(pipeline.use_gzip) self.assertEqual(pipeline.exporter_cls, JsonItemExporter)
def test_setitem(self): settings = BaseSettings() settings.set('key', 'a', 'default') settings['key'] = 'b' self.assertEqual(settings['key'], 'b') self.assertEqual(settings.getpriority('key'), 20) settings['key'] = 'c' self.assertEqual(settings['key'], 'c') settings['key2'] = 'x' self.assertIn('key2', settings) self.assertEqual(settings['key2'], 'x') self.assertEqual(settings.getpriority('key2'), 20)
def getwithbase(self, name): """Get a composition of a dictionary-like setting and its `_BASE` counterpart. :param name: name of the dictionary-like setting :type name: str """ compbs = BaseSettings() compbs.update(self[name + '_BASE']) compbs.update(self[name]) compbs.update(self[name + '_USER']) return compbs
def test_max_chunk_size(self): settings = BaseSettings({ 'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz', 'S3PIPELINE_MAX_CHUNK_SIZE': 1000, 'FEED_EXPORTERS_BASE': default_settings.FEED_EXPORTERS_BASE, }) pipeline = S3Pipeline(settings, None) self.assertEqual(pipeline.max_chunk_size, 1000)
def test_force_no_gzip(self): settings = BaseSettings({ 'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz', 'S3PIPELINE_GZIP': False, 'FEED_EXPORTERS_BASE': default_settings.FEED_EXPORTERS_BASE, }) pipeline = S3Pipeline(settings, None) self.assertFalse(pipeline.use_gzip)
def test_max_wait_upload_time(self): settings = BaseSettings({ 'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz', 'S3PIPELINE_MAX_WAIT_UPLOAD_TIME': 300, 'FEED_EXPORTERS_BASE': default_settings.FEED_EXPORTERS_BASE, }) pipeline = S3Pipeline(settings, None) self.assertEqual(pipeline.max_wait_upload_time, 300)
def close(spider, reason): settings = BaseSettings({ 'MAIL_FROM': '*****@*****.**', 'MAIL_HOST': 'smtp.aliyun.com', 'MAIL_PORT': '25', 'MAIL_USER': '******', 'MAIL_PASS': '******', }) print 'start send email' mailer = MailSender.from_settings(settings=settings) mailer.send(to=["*****@*****.**"], subject="job spider end", body=reason) print 'end send email'
def _map_keys(compdict): if isinstance(compdict, BaseSettings): compbs = BaseSettings() for k, v in compdict.items(): prio = compdict.getpriority(k) if compbs.getpriority(convert(k)) == prio: raise ValueError('Some paths in {!r} convert to the same ' 'object, please update your settings' ''.format(list(compdict.keys()))) else: compbs.set(convert(k), v, priority=prio) return compbs else: _check_components(compdict) return {convert(k): v for k, v in compdict.items()}
def test_duplicate_components_in_basesettings(self): # Higher priority takes precedence duplicate_bs = BaseSettings({'one': 1, 'two': 2}, priority=0) duplicate_bs.set('ONE', 4, priority=10) self.assertEqual(build_component_list(duplicate_bs, convert=lambda x: x.lower()), ['two', 'one']) duplicate_bs.set('one', duplicate_bs['one'], priority=20) self.assertEqual(build_component_list(duplicate_bs, convert=lambda x: x.lower()), ['one', 'two']) # Same priority raises ValueError duplicate_bs.set('ONE', duplicate_bs['ONE'], priority=20) self.assertRaises(ValueError, build_component_list, duplicate_bs, convert=lambda x: x.lower())
def test_gcs(self): settings = BaseSettings({ 'S3PIPELINE_URL': 'gs://my-bucket/{name}/{time}/items.{chunk:07d}.jl', 'FEED_EXPORTERS_BASE': default_settings.FEED_EXPORTERS_BASE, }) pipeline = S3Pipeline(settings, None) self.assertEqual(pipeline.bucket_name, 'my-bucket') self.assertEqual(pipeline.object_key_template, '{name}/{time}/items.{chunk:07d}.jl') self.assertEqual(pipeline.max_chunk_size, 100) self.assertFalse(pipeline.use_gzip) self.assertEqual(pipeline.max_wait_upload_time, 30) self.assertIsInstance(pipeline.strategy, GCSStrategy) self.assertEqual(pipeline.exporter_cls, JsonLinesItemExporter)
def spider_opened(self, spider): try: spider_attr = getattr(spider, "TOR_PROXY_ENABLED") except AttributeError: if not spider.crawler.settings.getbool("TOR_PROXY_ENABLED"): self.enabled = False self.logger.info("Tor Proxy disabled (TOR_PROXY_ENABLED setting)") return else: if not BaseSettings({"enabled": spider_attr}).getbool("enabled"): self.enabled = False self.logger.info("Tor Proxy disabled (tor_proxy_enabled spider attribute)") return self.enabled = True self._read_settings(spider.crawler.settings) if self.enabled: self.logger.info("Using Tor Proxy at %s", self.proxy_url)
def test_duplicate_components_in_basesettings(self): # Higher priority takes precedence duplicate_bs = BaseSettings({"one": 1, "two": 2}, priority=0) duplicate_bs.set("ONE", 4, priority=10) self.assertEqual( build_component_list(duplicate_bs, convert=lambda x: x.lower()), ["two", "one"], ) duplicate_bs.set("one", duplicate_bs["one"], priority=20) self.assertEqual( build_component_list(duplicate_bs, convert=lambda x: x.lower()), ["one", "two"], ) # Same priority raises ValueError duplicate_bs.set("ONE", duplicate_bs["ONE"], priority=20) self.assertRaises(ValueError, build_component_list, duplicate_bs, convert=lambda x: x.lower())
def spider_opened(self, spider): try: spider_attr = getattr(spider, "crawlera_fetch_enabled") except AttributeError: if not spider.crawler.settings.getbool("CRAWLERA_FETCH_ENABLED"): self.enabled = False logger.info( "Crawlera Fetch disabled (CRAWLERA_FETCH_ENABLED setting)") return else: if not BaseSettings({"enabled": spider_attr}).getbool("enabled"): self.enabled = False logger.info( "Crawlera Fetch disabled (crawlera_fetch_enabled spider attribute)" ) return self.enabled = True self._read_settings(spider) if self.enabled: logger.info("Using Crawlera Fetch API at %s with apikey %s***" % (self.url, self.apikey[:5]))
def setUp(self): self.settings = BaseSettings()
help="log file. if omitted stderr will be used") parser.add_option("--nolog", action="store_true", help="disable logging completely") parser.add_option("--profile", metavar="FILE", default=None, help="write python cProfile stats to FILE") parser.add_option("--pidfile", metavar="FILE", help="write process ID to FILE") parser.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE", help="set/override setting (may be repeated)") arg = ['--logfile=test.txt', '--profile=test2.txt', '-sa=b'] opts, args = parser.parse_args(arg) print(args) print(opts) com = Command() com.settings = BaseSettings() sc = ScrapyCommand() com.process_options(args, opts) p1 = com.settings.attributes p2 = com.settings.get("LOG_FILE") cs = com.settings print(type(cs.attributes["LOG_FILE"]))
def test_getpriority(self): settings = BaseSettings({'key': 'value'}, priority=99) self.assertEqual(settings.getpriority('key'), 99) self.assertEqual(settings.getpriority('nonexistentkey'), None)
def setUp(self): self.settings = BaseSettings({ 'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz', })