Esempio n. 1
0
    def seed_loader_setup(self, seeds_content=None):
        seed_path = os.path.join(self.tmp_path, 'seeds.txt')
        default_content = """
https://www.example.com
https://www.scrapy.org
"""
        seeds_content = seeds_content or default_content
        with open(seed_path, 'wb') as tmpl_file:
            tmpl_file.write(seeds_content.encode('utf-8'))
        assert os.path.isfile(seed_path)  # Failure of test itself
        settings = Settings()
        settings.SEEDS_SOURCE = seed_path
        crawler = type('crawler', (object, ), {})
        crawler.settings = settings
        return FileSeedLoader(crawler)
Esempio n. 2
0
    def seed_loader_setup(self, seeds_content=None):
        seed_path = os.path.join(self.tmp_path, 'seeds.txt')
        default_content = """
https://www.example.com
https://www.scrapy.org
"""
        seeds_content = seeds_content or default_content
        with open(seed_path, 'wb') as tmpl_file:
            tmpl_file.write(seeds_content.encode('utf-8'))
        assert os.path.isfile(seed_path)  # Failure of test itself
        settings = Settings()
        settings.SEEDS_SOURCE = seed_path
        crawler = type('crawler', (object,), {})
        crawler.settings = settings
        return FileSeedLoader(crawler)
Esempio n. 3
0
    def setUp(self):
        self.tmp_path = mkdtemp()
        settings = Settings()
        settings.SEEDS_SOURCE = 's3://some-bucket/seeds-folder'
        settings.SEEDS_AWS_ACCESS_KEY = 'access_key'
        settings.SEEDS_AWS_SECRET_ACCESS_KEY = 'secret_key'
        crawler = type('crawler', (object, ), {})
        crawler.settings = settings
        self.seed_path_1 = os.path.join(self.tmp_path, 'seeds1.txt')
        self.seed_path_2 = os.path.join(self.tmp_path, 'seeds2.txt')
        s1_content = """
https://www.example.com
https://www.scrapy.org
"""
        s2_content = """
https://www.dmoz.org
https://www.test.com
"""

        with open(self.seed_path_1, 'wb') as tmpl_file:
            tmpl_file.write(s1_content.encode('utf-8'))
        with open(self.seed_path_2, 'wb') as tmpl_file:
            tmpl_file.write(s2_content.encode('utf-8'))
        self.seed_loader = S3SeedLoader(crawler)
Esempio n. 4
0
    def setUp(self):
        self.tmp_path = mkdtemp()
        settings = Settings()
        settings.SEEDS_SOURCE = 's3://some-bucket/seeds-folder'
        settings.SEEDS_AWS_ACCESS_KEY = 'access_key'
        settings.SEEDS_AWS_SECRET_ACCESS_KEY = 'secret_key'
        crawler = type('crawler', (object,), {})
        crawler.settings = settings
        self.seed_path_1 = os.path.join(self.tmp_path, 'seeds1.txt')
        self.seed_path_2 = os.path.join(self.tmp_path, 'seeds2.txt')
        s1_content = """
https://www.example.com
https://www.scrapy.org
"""
        s2_content = """
https://www.dmoz.org
https://www.test.com
"""

        with open(self.seed_path_1, 'wb') as tmpl_file:
            tmpl_file.write(s1_content.encode('utf-8'))
        with open(self.seed_path_2, 'wb') as tmpl_file:
            tmpl_file.write(s2_content.encode('utf-8'))
        self.seed_loader = S3SeedLoader(crawler)
Esempio n. 5
0
 def test_invalid_s3_seed_source(self):
     crawler = type('crawler', (object, ), {})
     settings = Settings()
     settings.SEEDS_SOURCE = 'invalid_url'
     crawler.settings = settings
     self.assertRaises(NotConfigured, S3SeedLoader, crawler)
Esempio n. 6
0
 def test_invalid_s3_seed_source(self):
     crawler = type('crawler', (object,), {})
     settings = Settings()
     settings.SEEDS_SOURCE = 'invalid_url'
     crawler.settings = settings
     self.assertRaises(NotConfigured, S3SeedLoader, crawler)