def seed_loader_setup(self, seeds_content=None): seed_path = os.path.join(self.tmp_path, 'seeds.txt') default_content = """ https://www.example.com https://www.scrapy.org """ seeds_content = seeds_content or default_content with open(seed_path, 'wb') as tmpl_file: tmpl_file.write(seeds_content.encode('utf-8')) assert os.path.isfile(seed_path) # Failure of test itself settings = Settings() settings.SEEDS_SOURCE = seed_path crawler = type('crawler', (object, ), {}) crawler.settings = settings return FileSeedLoader(crawler)
def seed_loader_setup(self, seeds_content=None): seed_path = os.path.join(self.tmp_path, 'seeds.txt') default_content = """ https://www.example.com https://www.scrapy.org """ seeds_content = seeds_content or default_content with open(seed_path, 'wb') as tmpl_file: tmpl_file.write(seeds_content.encode('utf-8')) assert os.path.isfile(seed_path) # Failure of test itself settings = Settings() settings.SEEDS_SOURCE = seed_path crawler = type('crawler', (object,), {}) crawler.settings = settings return FileSeedLoader(crawler)
def setUp(self): self.tmp_path = mkdtemp() settings = Settings() settings.SEEDS_SOURCE = 's3://some-bucket/seeds-folder' settings.SEEDS_AWS_ACCESS_KEY = 'access_key' settings.SEEDS_AWS_SECRET_ACCESS_KEY = 'secret_key' crawler = type('crawler', (object, ), {}) crawler.settings = settings self.seed_path_1 = os.path.join(self.tmp_path, 'seeds1.txt') self.seed_path_2 = os.path.join(self.tmp_path, 'seeds2.txt') s1_content = """ https://www.example.com https://www.scrapy.org """ s2_content = """ https://www.dmoz.org https://www.test.com """ with open(self.seed_path_1, 'wb') as tmpl_file: tmpl_file.write(s1_content.encode('utf-8')) with open(self.seed_path_2, 'wb') as tmpl_file: tmpl_file.write(s2_content.encode('utf-8')) self.seed_loader = S3SeedLoader(crawler)
def setUp(self): self.tmp_path = mkdtemp() settings = Settings() settings.SEEDS_SOURCE = 's3://some-bucket/seeds-folder' settings.SEEDS_AWS_ACCESS_KEY = 'access_key' settings.SEEDS_AWS_SECRET_ACCESS_KEY = 'secret_key' crawler = type('crawler', (object,), {}) crawler.settings = settings self.seed_path_1 = os.path.join(self.tmp_path, 'seeds1.txt') self.seed_path_2 = os.path.join(self.tmp_path, 'seeds2.txt') s1_content = """ https://www.example.com https://www.scrapy.org """ s2_content = """ https://www.dmoz.org https://www.test.com """ with open(self.seed_path_1, 'wb') as tmpl_file: tmpl_file.write(s1_content.encode('utf-8')) with open(self.seed_path_2, 'wb') as tmpl_file: tmpl_file.write(s2_content.encode('utf-8')) self.seed_loader = S3SeedLoader(crawler)
def test_invalid_s3_seed_source(self): crawler = type('crawler', (object, ), {}) settings = Settings() settings.SEEDS_SOURCE = 'invalid_url' crawler.settings = settings self.assertRaises(NotConfigured, S3SeedLoader, crawler)
def test_invalid_s3_seed_source(self): crawler = type('crawler', (object,), {}) settings = Settings() settings.SEEDS_SOURCE = 'invalid_url' crawler.settings = settings self.assertRaises(NotConfigured, S3SeedLoader, crawler)