def test_doc(self): cli = "https://www.example.com --doc zip" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.documents.add("zip") self.assertEqual(vars(c), vars(correct_output))
def test_doc_and_no_docs(self): cli = "https://www.example.com --find phone email social --doc zip" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.documents = {"zip"} self.assertEqual(vars(c), vars(correct_output))
def test_agent(self): cli = "https://www.example.com -a g" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.agent = arachnid_enums.Agent.GOOGLE.value self.assertEqual(vars(c), vars(correct_output))
def test_stealth(self): cli = "https://www.example.com --stealth" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.set_stealth() self.assertEqual(vars(c), vars(correct_output))
def test_stealth_modified(self): cli = "https://www.example.com -T none --stealth" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.set_stealth() correct_output.default_delay = arachnid_enums.Delay.NONE.value self.assertEqual(vars(c), vars(correct_output))
def test_multiple(self): cli = "https://www.example.com --find email phone social docs" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.scrape_phone_number = True correct_output.scrape_email = True correct_output.scrape_social_media = True self.assertEqual(vars(c), vars(correct_output))
def test_phone_number(self): cli = "https://www.example.com --find phone" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.scrape_phone_number = True correct_output.scrape_email = False correct_output.scrape_social_media = False correct_output.documents = {} self.assertEqual(vars(c), vars(correct_output))
def test_delay(self): cli = "https://www.example.com -T high" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) correct_output = CrawlerConfig() correct_output.default_delay = arachnid_enums.Delay.HIGH.value
def test_default(self): cli = "https://www.example.com" namespace = self.get_namespace(cli) c = generate_crawler_config(namespace) self.assertEqual(vars(c), vars(CrawlerConfig()))