Esempio n. 1
0
    def setUp(self):
        self.tmp_path = mkdtemp()
        settings = Settings()
        settings.SEEDS_SOURCE = 's3://some-bucket/seeds-folder'
        settings.SEEDS_AWS_ACCESS_KEY = 'access_key'
        settings.SEEDS_AWS_SECRET_ACCESS_KEY = 'secret_key'
        crawler = type('crawler', (object, ), {})
        crawler.settings = settings
        self.seed_path_1 = os.path.join(self.tmp_path, 'seeds1.txt')
        self.seed_path_2 = os.path.join(self.tmp_path, 'seeds2.txt')
        s1_content = """
https://www.example.com
https://www.scrapy.org
"""
        s2_content = """
https://www.dmoz.org
https://www.test.com
"""

        with open(self.seed_path_1, 'wb') as tmpl_file:
            tmpl_file.write(s1_content.encode('utf-8'))
        with open(self.seed_path_2, 'wb') as tmpl_file:
            tmpl_file.write(s2_content.encode('utf-8'))
        self.seed_loader = S3SeedLoader(crawler)
Esempio n. 2
0
    def setUp(self):
        self.tmp_path = mkdtemp()
        settings = Settings()
        settings.SEEDS_SOURCE = 's3://some-bucket/seeds-folder'
        settings.SEEDS_AWS_ACCESS_KEY = 'access_key'
        settings.SEEDS_AWS_SECRET_ACCESS_KEY = 'secret_key'
        crawler = type('crawler', (object,), {})
        crawler.settings = settings
        self.seed_path_1 = os.path.join(self.tmp_path, 'seeds1.txt')
        self.seed_path_2 = os.path.join(self.tmp_path, 'seeds2.txt')
        s1_content = """
https://www.example.com
https://www.scrapy.org
"""
        s2_content = """
https://www.dmoz.org
https://www.test.com
"""

        with open(self.seed_path_1, 'wb') as tmpl_file:
            tmpl_file.write(s1_content.encode('utf-8'))
        with open(self.seed_path_2, 'wb') as tmpl_file:
            tmpl_file.write(s2_content.encode('utf-8'))
        self.seed_loader = S3SeedLoader(crawler)