def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return tjob.run() if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "keyword" in result: self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "count" in result: self.assertEqual(len(tjob.urllist), int(result["count"])) if "pattern" in result: for url in tjob.urllist: self.assertRegex(url, result["pattern"])
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) if "range" in result: config.set(("image-range", ), result["range"]) config.set(("chapter-range", ), result["range"]) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.StopExtraction: pass except exception.HttpError as exc: if re.match(r"5\d\d: ", str(exc)): self.skipTest(exc) raise # test archive-id uniqueness self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "keyword" in result: keyword = result["keyword"] if isinstance(keyword, dict): for kwdict in tjob.list_keyword: self._test_kwdict(kwdict, keyword) else: # assume SHA1 hash self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.list_url), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.list_url), count) if "pattern" in result: self.assertGreater(len(tjob.list_url), 0) for url in tjob.list_url: self.assertRegex(url, result["pattern"])
def main(): parser = argparse.ArgumentParser() parser.add_argument("--content", action="store_true") parser.add_argument("--recreate", action="store_true") parser.add_argument("urls", nargs="*") args = parser.parse_args() if args.recreate: urls = [ test[0] for extr in extractor.extractors() if extr.category in args.urls for test in extr.test ] else: urls = args.urls config.load() for url in urls: tjob = job.TestJob(url, content=args.content) try: tjob.run() except Exception as exc: fmt = TESTDATA_EXCEPTION_FMT data = (exc.__class__.__name__,) else: fmt = TESTDATA_FMT data = (tjob.hash_url.hexdigest(), tjob.hash_keyword.hexdigest(), tjob.hash_content.hexdigest()) print(tjob.extractor.__class__.__name__) print(fmt.format(url, *data))
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.HttpError as exc: try: if 500 <= exc.args[0].response.status_code < 600: self.skipTest(exc) except AttributeError: pass raise # test archive-id uniqueness self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "keyword" in result: keyword = result["keyword"] if isinstance(keyword, dict): for kwdict in tjob.list_keyword: self._test_kwdict(kwdict, keyword) else: # assume SHA1 hash self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.list_url), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.list_url), count) if "pattern" in result: for url in tjob.list_url: self.assertRegex(url, result["pattern"])
def _run_test(self, extr, url, result): tjob = job.TestJob(url, "content" in result) self.assertEqual(extr, tjob.extractor.__class__) if "exception" in result: self.assertRaises(result["exception"], tjob.run) return tjob.run() if "url" in result: self.assertEqual(tjob.hash_url.hexdigest(), result["url"]) if "keyword" in result: self.assertEqual(tjob.hash_keyword.hexdigest(), result["keyword"]) if "content" in result: self.assertEqual(tjob.hash_content.hexdigest(), result["content"])
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.HttpError as exc: try: if 500 <= exc.args[0].response.status_code < 600: self.skipTest(exc) except AttributeError as e: pass raise if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "keyword" in result: self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "count" in result: self.assertEqual(len(tjob.urllist), int(result["count"])) if "pattern" in result: for url in tjob.urllist: self.assertRegex(url, result["pattern"])