Ejemplo n.º 1
0
 def test_getcomposite(self):
     s = BaseSettings({
         'TEST_BASE': {
             1: 1,
             2: 2
         },
         'TEST': BaseSettings({
             1: 10,
             3: 30
         }, 'default'),
         'HASNOBASE': BaseSettings({1: 1}, 'default')
     })
     s['TEST'].set(4, 4, priority='project')
     # When users specify a _BASE setting they explicitly don't want to use
     # Scrapy's defaults, so we don't want to see anything that has a
     # 'default' priority from TEST
     cs = s._getcomposite('TEST')
     self.assertEqual(len(cs), 3)
     self.assertEqual(cs[1], 1)
     self.assertEqual(cs[2], 2)
     self.assertEqual(cs[4], 4)
     cs = s._getcomposite('HASNOBASE')
     self.assertEqual(len(cs), 1)
     self.assertEqual(cs[1], 1)
     cs = s._getcomposite('NONEXISTENT')
     self.assertIsNone(cs)
 def test_copy_to_dict(self):
     s = BaseSettings({
         'TEST_STRING': 'a string',
         'TEST_LIST': [1, 2],
         'TEST_BOOLEAN': False,
         'TEST_BASE': BaseSettings({
             1: 1,
             2: 2
         }, 'project'),
         'TEST': BaseSettings({
             1: 10,
             3: 30
         }, 'default'),
         'HASNOBASE': BaseSettings({3: 3000}, 'default')
     })
     self.assertDictEqual(
         s.copy_to_dict(), {
             'HASNOBASE': {
                 3: 3000
             },
             'TEST': {
                 1: 10,
                 3: 30
             },
             'TEST_BASE': {
                 1: 1,
                 2: 2
             },
             'TEST_BOOLEAN': False,
             'TEST_LIST': [1, 2],
             'TEST_STRING': 'a string'
         })
Ejemplo n.º 3
0
    def test_update(self):
        settings = BaseSettings({'key_lowprio': 0}, priority=0)
        settings.set('key_highprio', 10, priority=50)
        custom_settings = BaseSettings({
            'key_lowprio': 1,
            'key_highprio': 11
        },
                                       priority=30)
        custom_settings.set('newkey_one', None, priority=50)
        custom_dict = {
            'key_lowprio': 2,
            'key_highprio': 12,
            'newkey_two': None
        }

        settings.update(custom_dict, priority=20)
        self.assertEqual(settings['key_lowprio'], 2)
        self.assertEqual(settings.getpriority('key_lowprio'), 20)
        self.assertEqual(settings['key_highprio'], 10)
        self.assertIn('newkey_two', settings)
        self.assertEqual(settings.getpriority('newkey_two'), 20)

        settings.update(custom_settings)
        self.assertEqual(settings['key_lowprio'], 1)
        self.assertEqual(settings.getpriority('key_lowprio'), 30)
        self.assertEqual(settings['key_highprio'], 10)
        self.assertIn('newkey_one', settings)
        self.assertEqual(settings.getpriority('newkey_one'), 50)

        settings.update({'key_lowprio': 3}, priority=20)
        self.assertEqual(settings['key_lowprio'], 1)
Ejemplo n.º 4
0
    def from_crawler(cls, crawler: Crawler):
        base_settings: BaseSettings = crawler.settings
        cls.normalize(base_settings)

        settings = BaseSettings(priority='spider')
        cls.from_object(settings, crawler.spidercls.SpiderConfig)
        settings.update(
            {k: v
             for k, v in base_settings.items() if k in settings},
            priority='cmdline')

        preset = base_settings.get('PRESET')
        if preset:
            preset_dict = BaseSettings(priority=35)
            cls.from_pyfile(preset_dict, preset)
            settings.update(preset_dict)

        adapted = BaseSettings(priority=50)
        for k, v in settings.items():
            adapt = getattr(SettingsAdapter, k.lower(), None)
            if adapt:
                adapted.update(adapt(v))
            else:
                adapted[k] = v

        base_settings.update(adapted.copy_to_dict(), priority=50)
        base_settings['SPIDER_CONFIG'] = adapted
        return cls()
Ejemplo n.º 5
0
 def test_update_jsonstring(self):
     settings = BaseSettings({'number': 0, 'dict': BaseSettings({'key': 'val'})})
     settings.update('{"number": 1, "newnumber": 2}')
     self.assertEqual(settings['number'], 1)
     self.assertEqual(settings['newnumber'], 2)
     settings.set("dict", '{"key": "newval", "newkey": "newval2"}')
     self.assertEqual(settings['dict']['key'], "newval")
     self.assertEqual(settings['dict']['newkey'], "newval2")
Ejemplo n.º 6
0
 def test_getwithbase(self):
     s = BaseSettings({'TEST_BASE': BaseSettings({1: 1, 2: 2}, 'project'),
                       'TEST': BaseSettings({1: 10, 3: 30}, 'default'),
                       'HASNOBASE': BaseSettings({3: 3000}, 'default')})
     s['TEST'].set(2, 200, 'cmdline')
     six.assertCountEqual(self, s.getwithbase('TEST'),
                          {1: 1, 2: 200, 3: 30})
     six.assertCountEqual(self, s.getwithbase('HASNOBASE'), s['HASNOBASE'])
     self.assertEqual(s.getwithbase('NONEXISTENT'), {})
Ejemplo n.º 7
0
 def test_repr(self):
     settings = BaseSettings()
     self.assertEqual(repr(settings), "<BaseSettings {}>")
     attr = SettingsAttribute('testval', 15)
     settings['testkey'] = attr
     self.assertEqual(repr(settings),
                      "<BaseSettings {'testkey': %s}>" % repr(attr))
Ejemplo n.º 8
0
    def test_overwrite_basesettings(self):
        original_dict = {'one': 10, 'two': 20}
        original_settings = BaseSettings(original_dict, 0)
        attribute = SettingsAttribute(original_settings, 0)

        new_dict = {'three': 11, 'four': 21}
        attribute.set(new_dict, 10)
        self.assertIsInstance(attribute.value, BaseSettings)
        six.assertCountEqual(self, attribute.value, new_dict)
        six.assertCountEqual(self, original_settings, original_dict)

        new_settings = BaseSettings({'five': 12}, 0)
        attribute.set(new_settings, 0)  # Insufficient priority
        six.assertCountEqual(self, attribute.value, new_dict)
        attribute.set(new_settings, 10)
        six.assertCountEqual(self, attribute.value, new_settings)
Ejemplo n.º 9
0
    def __init__(self, values=None, priority='project'):
        super().__init__()
        self.setmodule(aio_settings, 'default')

        for name, val in self.items():
            if isinstance(val, dict):
                self.set(name, BaseSettings(val, 'default'), 'default')
        self.update(values, priority)
Ejemplo n.º 10
0
    def test_set_per_key_priorities(self):
        attribute = SettingsAttribute(BaseSettings({
            'one': 10,
            'two': 20
        }, 0), 0)

        new_dict = {'one': 11, 'two': 21}
        attribute.set(new_dict, 10)
        self.assertEqual(attribute.value['one'], 11)
        self.assertEqual(attribute.value['two'], 21)

        new_settings = BaseSettings()
        new_settings.set('one', 12, 20)
        new_settings.set('two', 12, 0)
        attribute.set(new_settings, 0)
        self.assertEqual(attribute.value['one'], 12)
        self.assertEqual(attribute.value['two'], 21)
Ejemplo n.º 11
0
 def test_delete(self):
     settings = BaseSettings({'key': None})
     settings.set('key_highprio', None, priority=50)
     settings.delete('key')
     settings.delete('key_highprio')
     self.assertNotIn('key', settings)
     self.assertIn('key_highprio', settings)
     del settings['key_highprio']
     self.assertNotIn('key_highprio', settings)
 def setUp(self):
     self.settings = BaseSettings({
         'S3PIPELINE_URL':
         's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz',
         'KINESISSTREAM_NAME':
         'kinesis-stream',
         'KENISISPARTITION_KEY':
         'kinesis-partition-key'
     })
Ejemplo n.º 13
0
def test_update_settings_per_key_priorities_new_behaviour():
    from scrapy.settings import BaseSettings
    test = EntrypointSettings()
    test.set('ITEM_PIPELINES', BaseSettings())
    test['ITEM_PIPELINES'].update({'test.path1': 100})
    test['ITEM_PIPELINES'].update({'test.path2': 200})
    assert dict(test['ITEM_PIPELINES']) == {
        'test.path1': 100,
        'test.path2': 200
    }
    def test_json_gz(self):
        settings = BaseSettings({
            'S3PIPELINE_URL':
            's3://my-bucket/{name}/{time}/items.{chunk:07d}.json.gz',
            'FEED_EXPORTERS_BASE':
            default_settings.FEED_EXPORTERS_BASE,
        })

        pipeline = S3Pipeline(settings, None)
        self.assertTrue(pipeline.use_gzip)
        self.assertEqual(pipeline.exporter_cls, JsonItemExporter)
Ejemplo n.º 15
0
 def test_setitem(self):
     settings = BaseSettings()
     settings.set('key', 'a', 'default')
     settings['key'] = 'b'
     self.assertEqual(settings['key'], 'b')
     self.assertEqual(settings.getpriority('key'), 20)
     settings['key'] = 'c'
     self.assertEqual(settings['key'], 'c')
     settings['key2'] = 'x'
     self.assertIn('key2', settings)
     self.assertEqual(settings['key2'], 'x')
     self.assertEqual(settings.getpriority('key2'), 20)
Ejemplo n.º 16
0
    def getwithbase(self, name):
        """Get a composition of a dictionary-like setting and its `_BASE`
        counterpart.

        :param name: name of the dictionary-like setting
        :type name: str
        """
        compbs = BaseSettings()
        compbs.update(self[name + '_BASE'])
        compbs.update(self[name])
        compbs.update(self[name + '_USER'])
        return compbs
    def test_max_chunk_size(self):

        settings = BaseSettings({
            'S3PIPELINE_URL':
            's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz',
            'S3PIPELINE_MAX_CHUNK_SIZE':
            1000,
            'FEED_EXPORTERS_BASE':
            default_settings.FEED_EXPORTERS_BASE,
        })

        pipeline = S3Pipeline(settings, None)
        self.assertEqual(pipeline.max_chunk_size, 1000)
    def test_force_no_gzip(self):

        settings = BaseSettings({
            'S3PIPELINE_URL':
            's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz',
            'S3PIPELINE_GZIP':
            False,
            'FEED_EXPORTERS_BASE':
            default_settings.FEED_EXPORTERS_BASE,
        })

        pipeline = S3Pipeline(settings, None)
        self.assertFalse(pipeline.use_gzip)
    def test_max_wait_upload_time(self):

        settings = BaseSettings({
            'S3PIPELINE_URL':
            's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz',
            'S3PIPELINE_MAX_WAIT_UPLOAD_TIME':
            300,
            'FEED_EXPORTERS_BASE':
            default_settings.FEED_EXPORTERS_BASE,
        })

        pipeline = S3Pipeline(settings, None)
        self.assertEqual(pipeline.max_wait_upload_time, 300)
Ejemplo n.º 20
0
 def close(spider, reason):
     settings = BaseSettings({
         'MAIL_FROM': '*****@*****.**',
         'MAIL_HOST': 'smtp.aliyun.com',
         'MAIL_PORT': '25',
         'MAIL_USER': '******',
         'MAIL_PASS': '******',
     })
     print 'start send email'
     mailer = MailSender.from_settings(settings=settings)
     mailer.send(to=["*****@*****.**"],
                 subject="job spider end",
                 body=reason)
     print 'end send email'
Ejemplo n.º 21
0
 def _map_keys(compdict):
     if isinstance(compdict, BaseSettings):
         compbs = BaseSettings()
         for k, v in compdict.items():
             prio = compdict.getpriority(k)
             if compbs.getpriority(convert(k)) == prio:
                 raise ValueError('Some paths in {!r} convert to the same '
                                  'object, please update your settings'
                                  ''.format(list(compdict.keys())))
             else:
                 compbs.set(convert(k), v, priority=prio)
         return compbs
     else:
         _check_components(compdict)
         return {convert(k): v for k, v in compdict.items()}
Ejemplo n.º 22
0
 def test_duplicate_components_in_basesettings(self):
     # Higher priority takes precedence
     duplicate_bs = BaseSettings({'one': 1, 'two': 2}, priority=0)
     duplicate_bs.set('ONE', 4, priority=10)
     self.assertEqual(build_component_list(duplicate_bs,
                                           convert=lambda x: x.lower()),
                      ['two', 'one'])
     duplicate_bs.set('one', duplicate_bs['one'], priority=20)
     self.assertEqual(build_component_list(duplicate_bs,
                                           convert=lambda x: x.lower()),
                      ['one', 'two'])
     # Same priority raises ValueError
     duplicate_bs.set('ONE', duplicate_bs['ONE'], priority=20)
     self.assertRaises(ValueError, build_component_list, duplicate_bs,
                       convert=lambda x: x.lower())
    def test_gcs(self):
        settings = BaseSettings({
            'S3PIPELINE_URL':
            'gs://my-bucket/{name}/{time}/items.{chunk:07d}.jl',
            'FEED_EXPORTERS_BASE':
            default_settings.FEED_EXPORTERS_BASE,
        })

        pipeline = S3Pipeline(settings, None)
        self.assertEqual(pipeline.bucket_name, 'my-bucket')
        self.assertEqual(pipeline.object_key_template,
                         '{name}/{time}/items.{chunk:07d}.jl')
        self.assertEqual(pipeline.max_chunk_size, 100)
        self.assertFalse(pipeline.use_gzip)
        self.assertEqual(pipeline.max_wait_upload_time, 30)
        self.assertIsInstance(pipeline.strategy, GCSStrategy)
        self.assertEqual(pipeline.exporter_cls, JsonLinesItemExporter)
 def spider_opened(self, spider):
     try:
         spider_attr = getattr(spider, "TOR_PROXY_ENABLED")
     except AttributeError:
         if not spider.crawler.settings.getbool("TOR_PROXY_ENABLED"):
             self.enabled = False
             self.logger.info("Tor Proxy disabled (TOR_PROXY_ENABLED setting)")
             return
     else:
         if not BaseSettings({"enabled": spider_attr}).getbool("enabled"):
             self.enabled = False
             self.logger.info("Tor Proxy disabled (tor_proxy_enabled spider attribute)")
             return
         
     self.enabled = True
     self._read_settings(spider.crawler.settings)
     if self.enabled:
         self.logger.info("Using Tor Proxy at %s", self.proxy_url)
Ejemplo n.º 25
0
 def test_duplicate_components_in_basesettings(self):
     # Higher priority takes precedence
     duplicate_bs = BaseSettings({"one": 1, "two": 2}, priority=0)
     duplicate_bs.set("ONE", 4, priority=10)
     self.assertEqual(
         build_component_list(duplicate_bs, convert=lambda x: x.lower()),
         ["two", "one"],
     )
     duplicate_bs.set("one", duplicate_bs["one"], priority=20)
     self.assertEqual(
         build_component_list(duplicate_bs, convert=lambda x: x.lower()),
         ["one", "two"],
     )
     # Same priority raises ValueError
     duplicate_bs.set("ONE", duplicate_bs["ONE"], priority=20)
     self.assertRaises(ValueError,
                       build_component_list,
                       duplicate_bs,
                       convert=lambda x: x.lower())
Ejemplo n.º 26
0
    def spider_opened(self, spider):
        try:
            spider_attr = getattr(spider, "crawlera_fetch_enabled")
        except AttributeError:
            if not spider.crawler.settings.getbool("CRAWLERA_FETCH_ENABLED"):
                self.enabled = False
                logger.info(
                    "Crawlera Fetch disabled (CRAWLERA_FETCH_ENABLED setting)")
                return
        else:
            if not BaseSettings({"enabled": spider_attr}).getbool("enabled"):
                self.enabled = False
                logger.info(
                    "Crawlera Fetch disabled (crawlera_fetch_enabled spider attribute)"
                )
                return

        self.enabled = True
        self._read_settings(spider)
        if self.enabled:
            logger.info("Using Crawlera Fetch API at %s with apikey %s***" %
                        (self.url, self.apikey[:5]))
Ejemplo n.º 27
0
 def setUp(self):
     self.settings = BaseSettings()
Ejemplo n.º 28
0
                  help="log file. if omitted stderr will be used")
parser.add_option("--nolog",
                  action="store_true",
                  help="disable logging completely")

parser.add_option("--profile",
                  metavar="FILE",
                  default=None,
                  help="write python cProfile stats to FILE")
parser.add_option("--pidfile", metavar="FILE", help="write process ID to FILE")
parser.add_option("-s",
                  "--set",
                  action="append",
                  default=[],
                  metavar="NAME=VALUE",
                  help="set/override setting (may be repeated)")

arg = ['--logfile=test.txt', '--profile=test2.txt', '-sa=b']
opts, args = parser.parse_args(arg)

print(args)
print(opts)
com = Command()
com.settings = BaseSettings()
sc = ScrapyCommand()
com.process_options(args, opts)
p1 = com.settings.attributes
p2 = com.settings.get("LOG_FILE")
cs = com.settings
print(type(cs.attributes["LOG_FILE"]))
Ejemplo n.º 29
0
 def test_getpriority(self):
     settings = BaseSettings({'key': 'value'}, priority=99)
     self.assertEqual(settings.getpriority('key'), 99)
     self.assertEqual(settings.getpriority('nonexistentkey'), None)
Ejemplo n.º 30
0
 def setUp(self):
     self.settings = BaseSettings({
         'S3PIPELINE_URL': 's3://my-bucket/{name}/{time}/items.{chunk:07d}.jl.gz',
     })