def test_crawl_native_fakeCrawler(self): parameter_file = "./test/search_parameters.json" c = Crawler("SimpleTest", parameters=FileOperations.get_from_JSON_file(parameter_file)) self.assertEqual(c.name, "SimpleTest") c.crawl_native() self.assertTrue(os.path.isfile(parameter_file)) result_from_file = FileOperations.get_from_JSON_file(c.output["path"]) self.assertEqual(len(result_from_file), 3)
def test_crawl_clientIntegrations(self): parameter_data = FileOperations.get_from_JSON_file("./test/search_integration.json") crawlers = parameter_data["crawlers"] crawlerName = "Integration" c = Crawler(crawlerName, parameters=crawlers[crawlerName]) data = c.crawl_native() self.assertTrue(len(data) > 0) c.save_crawler_data(data, crawlers[crawlerName]["output"])
def test_crawl_multithread_mmcoreAsync(self): parameter_data = FileOperations.get_from_JSON_file("./test/search_async.json") crawlers = parameter_data["crawlers"] crawlerName = "dotAsync" c = Crawler(crawlerName, parameters=crawlers[crawlerName]) data = c.crawl_native(threads=None) self.assertTrue(len(data) > 0) c.save_crawler_data(data, crawlers[crawlerName]["output"])
def test_crawl_native_minimalParameterFile_multithreaded(self): c = Crawler("MyMinimalCrawler", FileOperations.get_from_JSON_file("./test/minimal_parameters.json")) self.assertEqual(c.crawl_native(threads=10)['./test/test_inputs/minimalist_data.txt']['matches']['HasName']['city'][0], 'London')