def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return tjob.run() if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "keyword" in result: self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "count" in result: self.assertEqual(len(tjob.urllist), int(result["count"])) if "pattern" in result: for url in tjob.urllist: self.assertRegex(url, result["pattern"])
def main(): parser = argparse.ArgumentParser() parser.add_argument("--content", action="store_true") parser.add_argument("--recreate", action="store_true") parser.add_argument("urls", nargs="*") args = parser.parse_args() if args.recreate: urls = [ test[0] for extr in extractor.extractors() if extr.category in args.urls for test in extr.test ] else: urls = args.urls config.load() config.set(("downloader", "part"), False) for url in urls: tjob = job.TestJob(url, content=args.content) try: tjob.run() except Exception as exc: fmt = TESTDATA_EXCEPTION_FMT data = (exc.__class__.__name__, ) else: fmt = TESTDATA_FMT data = (tjob.hash_url.hexdigest(), tjob.hash_keyword.hexdigest(), tjob.hash_content.hexdigest()) print(tjob.extractor.__class__.__name__) print(fmt.format(url, *data))
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) if "range" in result: config.set(("_", "image", "range"), (result["range"],)) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.StopExtraction: pass except exception.HttpError as exc: if re.match(r"5\d\d HTTP Error:", str(exc)): self.skipTest(exc) raise # test archive-id uniqueness self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "keyword" in result: keyword = result["keyword"] if isinstance(keyword, dict): for kwdict in tjob.list_keyword: self._test_kwdict(kwdict, keyword) else: # assume SHA1 hash self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.list_url), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.list_url), count) if "pattern" in result: self.assertGreater(len(tjob.list_url), 0) for url in tjob.list_url: self.assertRegex(url, result["pattern"])
def test_interpolate(self): self.assertEqual(config.interpolate(["a"]), "1") self.assertEqual(config.interpolate(["b", "a"]), "1") self.assertEqual(config.interpolate(["b", "c"], "2"), "text") self.assertEqual(config.interpolate(["b", "d"], "2"), "2") config.set(["d"], 123) self.assertEqual(config.interpolate(["b", "d"], "2"), 123) self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
def setUp(self): name = "gallerydl" email = "*****@*****.**" config.set(("cache", "file"), ":memory:") config.set(("extractor", "username"), name) config.set(("extractor", "password"), name) config.set(("extractor", "nijie", "username"), email) config.set(("extractor", "seiga", "username"), email)
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.HttpError as exc: try: if 500 <= exc.args[0].response.status_code < 600: self.skipTest(exc) except AttributeError: pass raise # test archive-id uniqueness self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "keyword" in result: keyword = result["keyword"] if isinstance(keyword, dict): for kwdict in tjob.list_keyword: self._test_kwdict(kwdict, keyword) else: # assume SHA1 hash self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.list_url), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.list_url), count) if "pattern" in result: for url in tjob.list_url: self.assertRegex(url, result["pattern"])
def _test_warning(self, filename, exc): config.set(CKEY, filename) log = logging.getLogger("test") with mock.patch.object(log, "warning") as mock_warning: cookies = extractor.find("test:").session.cookies self.assertEqual(len(cookies), 0) self.assertEqual(mock_warning.call_count, 1) self.assertEqual(mock_warning.call_args[0][0], "cookies: %s") self.assertIsInstance(mock_warning.call_args[0][1], exc)
def _test_warning(self, filename, exc): config.set((), "cookies", filename) log = logging.getLogger("test") with mock.patch.object(log, "warning") as mock_warning: cookies = extractor.find("test:").session.cookies self.assertEqual(len(cookies), 0) self.assertEqual(mock_warning.call_count, 1) self.assertEqual(mock_warning.call_args[0][0], "cookies: %s") self.assertIsInstance(mock_warning.call_args[0][1], exc)
def test_cookiefile(self): config.set(CKEY, self.cookiefile) cookies = extractor.find("test:").session.cookies self.assertEqual(len(cookies), 1) cookie = next(iter(cookies)) self.assertEqual(cookie.domain, ".example.org") self.assertEqual(cookie.path, "/") self.assertEqual(cookie.name, "NAME") self.assertEqual(cookie.value, "VALUE")
def test_cookiefile(self): config.set((), "cookies", self.cookiefile) cookies = extractor.find("test:").session.cookies self.assertEqual(len(cookies), 1) cookie = next(iter(cookies)) self.assertEqual(cookie.domain, ".example.org") self.assertEqual(cookie.path, "/") self.assertEqual(cookie.name, "NAME") self.assertEqual(cookie.value, "VALUE")
def test_private(self): config.set(("output", ), "private", True) extr = TestExtractor.from_url("test:") tjob = self.jobclass(extr, file=io.StringIO()) tjob.run() for i in range(1, 4): self.assertEqual( tjob.data[i][2]["_fallback"], ("https://example.org/alt/{}.jpg".format(i), ), )
def test_sleep(self): extr = TestExtractor.from_url("test:") tjob = self.jobclass(extr, file=io.StringIO()) config.set((), "sleep-extractor", 123) with patch("time.sleep") as sleep: tjob.run() sleep.assert_called_once_with(123) config.set((), "sleep-extractor", 0) with patch("time.sleep") as sleep: tjob.run() sleep.assert_not_called()
def test_cookie_login(self): extr_cookies = { "exhentai": ("ipb_member_id", "ipb_pass_hash"), "nijie": ("nemail", "nlogin"), "sankaku": ("login", "pass_hash"), "seiga": ("user_session",), } for category, cookienames in extr_cookies.items(): cookies = {name: "value" for name in cookienames} config.set(CKEY, cookies) extr = _get_extractor(category) with mock.patch.object(extr, "_login_impl") as mock_login: extr.login() mock_login.assert_not_called()
def test_cookie_login(self): extr_cookies = { "exhentai": ("ipb_member_id", "ipb_pass_hash"), "idolcomplex": ("login", "pass_hash"), "nijie": ("nemail", "nlogin"), "seiga": ("user_session", ), } for category, cookienames in extr_cookies.items(): cookies = {name: "value" for name in cookienames} config.set((), "cookies", cookies) extr = _get_extractor(category) with mock.patch.object(extr, "_login_impl") as mock_login: extr.login() mock_login.assert_not_called()
def test_num_string(self): extr = TestExtractor.from_url("test:") tjob = self.jobclass(extr, file=io.StringIO()) with patch("gallery_dl.util.number_to_string") as nts: tjob.run() self.assertEqual(len(nts.call_args_list), 0) config.set(("output", ), "num-to-str", True) with patch("gallery_dl.util.number_to_string") as nts: tjob.run() self.assertEqual(len(nts.call_args_list), 52) tjob.run() self.assertEqual(tjob.data[-1][0], Message.Url) self.assertEqual(tjob.data[-1][2]["num"], "3")
def __init__(self, subreddit, path, sort_type, limit, previous_id=None, debug=False, disable_db=False, disable_im=False): # call constructor of GetSubredditSubmissions class passing args super().__init__(subreddit, path, sort_type, limit, previous_id, debug) self.log = logging.getLogger('DownloadSubredditSubmissions') self.Exceptions = (FileExistsException, FileExistsError, ImgurException, HTTPError, ValueError, SSLError, NoExtractorError, TurboPalmTreeException) self.disable_im = disable_im if not self.disable_im: # elastic search variables self.es_index, self.es_doc_type = 'tpt_images', 'image' # object used to add, search and compare images in elasticsearch # for duplicate deletion self.im = ImageMatchManager(index=self.es_index, doc_type=self.es_doc_type, distance_cutoff=0.40) self.disable_db = disable_db if not self.disable_db: # get db manager object for inserting and saving data to db try: self.db = TPTDatabaseManager() except sqlite3.OperationalError as e: self.log.error("{}: {}".format(e.__class__.__name__, str(e))) self.disable_db = True # used to check if url ends with any of these self.image_extensions = ('.png', '.jpg', '.jpeg', '.gif') video_extensions = ('.webm', '.mp4') self.media_extensions = tuple( chain(self.image_extensions, video_extensions)) # prevent gallery-dl module from printing to std output gallery_dl_config.set(("output", ), "mode", "null")
def test_extractor_filter(self): extr = TestExtractor.from_url("test:") tjob = self.jobclass(extr) func = tjob._build_extractor_filter() self.assertEqual(func(TestExtractor), False) self.assertEqual(func(TestExtractorParent), False) self.assertEqual(func(TestExtractorAlt), True) config.set((), "blacklist", ":test_subcategory") func = tjob._build_extractor_filter() self.assertEqual(func(TestExtractor), False) self.assertEqual(func(TestExtractorParent), True) self.assertEqual(func(TestExtractorAlt), False) config.set((), "whitelist", "test_category:test_subcategory") func = tjob._build_extractor_filter() self.assertEqual(func(TestExtractor), True) self.assertEqual(func(TestExtractorParent), False) self.assertEqual(func(TestExtractorAlt), False)
def test_set(self): config.set((), "c", [1, 2, 3]) config.set(("b", ), "c", [1, 2, 3]) config.set(("e", "f"), "g", value=234) self.assertEqual(config.get((), "c"), [1, 2, 3]) self.assertEqual(config.get(("b", ), "c"), [1, 2, 3]) self.assertEqual(config.get(("e", "f"), "g"), 234)
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) content = "content" in result else: content = False tjob = job.TestJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.HttpError as exc: try: if 500 <= exc.args[0].response.status_code < 600: self.skipTest(exc) except AttributeError as e: pass raise if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "keyword" in result: self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "count" in result: self.assertEqual(len(tjob.urllist), int(result["count"])) if "pattern" in result: for url in tjob.urllist: self.assertRegex(url, result["pattern"])
def test_interpolate(self): self.assertEqual(config.interpolate((), "a"), 1) self.assertEqual(config.interpolate(("b", ), "a"), 1) self.assertEqual(config.interpolate(("b", "b"), "a"), 1) self.assertEqual(config.interpolate((), "c"), None) self.assertEqual(config.interpolate(("b", ), "c"), "text") self.assertEqual(config.interpolate(("b", "b"), "c"), [8, 9]) self.assertEqual(config.interpolate(("a", ), "g"), None) self.assertEqual(config.interpolate(("a", "a"), "g"), None) self.assertEqual(config.interpolate(("e", "f"), "g"), None) self.assertEqual(config.interpolate(("e", "f"), "g", 4), 4) self.assertEqual(config.interpolate(("b", ), "d", 1), 1) self.assertEqual(config.interpolate(("d", ), "d", 1), 1) config.set((), "d", 2) self.assertEqual(config.interpolate(("b", ), "d", 1), 2) self.assertEqual(config.interpolate(("d", ), "d", 1), 2) config.set(("b", ), "d", 3) self.assertEqual(config.interpolate(("b", ), "d", 1), 2) self.assertEqual(config.interpolate(("d", ), "d", 1), 2)
def test_accumulate(self): self.assertEqual(config.accumulate((), "l"), []) config.set(() , "l", [5, 6]) config.set(("c",) , "l", [3, 4]) config.set(("c", "c"), "l", [1, 2]) self.assertEqual( config.accumulate((), "l") , [5, 6]) self.assertEqual( config.accumulate(("c",), "l") , [3, 4, 5, 6]) self.assertEqual( config.accumulate(("c", "c"), "l"), [1, 2, 3, 4, 5, 6]) config.set(("c",), "l", None) config.unset(("c", "c"), "l") self.assertEqual( config.accumulate((), "l") , [5, 6]) self.assertEqual( config.accumulate(("c",), "l") , [5, 6]) self.assertEqual( config.accumulate(("c", "c"), "l"), [5, 6])
def test_custom(self): config.set((), "filename", "custom") config.set((), "directory", ("custom",)) config.set((), "sleep-request", 321) extr = TestExtractor.from_url("test:") extr.request_interval = 123.456 self.assertEqual(self._capture_stdout(extr), """\ Category / Subcategory "test_category" / "test_subcategory" Filename format (custom): "custom" Filename format (default): "test_{filename}.{extension}" Directory format (custom): ["custom"] Directory format (default): ["{category}"] Request interval (custom): 321 Request interval (default): 123.456 """)
def test_set(self): config.set(["b", "c"], [1, 2, 3]) config.set(["e", "f", "g"], value=234) self.assertEqual(config.get(["b", "c"]), [1, 2, 3]) self.assertEqual(config.get(["e", "f", "g"]), 234)
def setUp(self): self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"} config.set((), "cookies", self.cdict)
def setUp(self): config.load() config.set(("cache", "file"), ":memory:")
def setUp(self): self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"} config.set(CKEY, self.cdict)
def setup_test_config(): name = "gallerydl" email = "*****@*****.**" config.clear() config.set(("cache", "file"), ":memory:") config.set(("downloader", "part"), False) config.set(("extractor", "timeout"), 60) config.set(("extractor", "username"), name) config.set(("extractor", "password"), name) config.set(("extractor", "nijie", "username"), email) config.set(("extractor", "seiga", "username"), email) config.set(("extractor", "danbooru", "username"), None) config.set(("extractor", "twitter" , "username"), None) config.set(("extractor", "mangoxo" , "password"), "VZ8DL3983u") config.set(("extractor", "deviantart", "client-id"), "7777") config.set(("extractor", "deviantart", "client-secret"), "ff14994c744d9208e5caeec7aab4a026") config.set(("extractor", "tumblr", "api-key"), "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6") config.set(("extractor", "tumblr", "api-secret"), "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj") config.set(("extractor", "tumblr", "access-token"), "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG") config.set(("extractor", "tumblr", "access-token-secret"), "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
def setup_test_config(): name = "gallerydl" email = "*****@*****.**" config.clear() config.set(("cache", "file"), ":memory:") config.set(("downloader", "part"), False) config.set(("downloader", "adjust-extensions"), False) config.set(("extractor", "timeout"), 60) config.set(("extractor", "username"), name) config.set(("extractor", "password"), name) config.set(("extractor", "nijie", "username"), email) config.set(("extractor", "seiga", "username"), email) config.set(("extractor", "danbooru", "username"), None) config.set(("extractor", "instagram", "username"), None) config.set(("extractor", "imgur", "username"), None) config.set(("extractor", "twitter", "username"), None) config.set(("extractor", "mangoxo", "username"), "LiQiang3") config.set(("extractor", "mangoxo", "password"), "5zbQF10_5u25259Ma") config.set(("extractor", "deviantart", "client-id"), "7777") config.set(("extractor", "deviantart", "client-secret"), "ff14994c744d9208e5caeec7aab4a026") config.set(("extractor", "tumblr", "api-key"), "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6") config.set(("extractor", "tumblr", "api-secret"), "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj") config.set(("extractor", "tumblr", "access-token"), "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG") config.set(("extractor", "tumblr", "access-token-secret"), "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
def setUpClass(cls): cls.extractor = extractor.find("test:") cls.extractor.log.job = None cls.dir = tempfile.TemporaryDirectory() cls.fnum = 0 config.set((), "base-directory", cls.dir.name)
def setUpClass(cls): cls.dir = tempfile.TemporaryDirectory() cls.fnum = 0 config.set((), "base-directory", cls.dir.name) cls.job = FakeJob()
def setUpClass(cls): cls.extractor = extractor.find("test:") cls.dir = tempfile.TemporaryDirectory() config.set(("base-directory", ), cls.dir.name)
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) if "range" in result: config.set(("image-range",), result["range"]) config.set(("chapter-range",), result["range"]) content = "content" in result else: content = False tjob = ResultJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: self.assertRaises(result["exception"], tjob.run) return try: tjob.run() except exception.StopExtraction: pass except exception.HttpError as exc: if re.match(r"5\d\d: ", str(exc)): self.skipTest(exc) raise # test archive-id uniqueness self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) # test '_extractor' entries if tjob.queue: for url, kwdict in zip(tjob.list_url, tjob.list_keyword): if "_extractor" in kwdict: extr = kwdict["_extractor"].from_url(url) self.assertIsInstance(extr, kwdict["_extractor"]) self.assertEqual(extr.url, url) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "keyword" in result: keyword = result["keyword"] if isinstance(keyword, dict): for kwdict in tjob.list_keyword: self._test_kwdict(kwdict, keyword) else: # assume SHA1 hash self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.list_url), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.list_url), count) if "pattern" in result: self.assertGreater(len(tjob.list_url), 0) for url in tjob.list_url: self.assertRegex(url, result["pattern"])
def setup_test_config(): name = "gallerydl" email = "*****@*****.**" email2 = "*****@*****.**" config.clear() config.set(("cache", ), "file", None) config.set(("downloader", ), "part", False) config.set(("downloader", ), "adjust-extensions", False) config.set(("extractor", ), "timeout", 60) config.set(("extractor", ), "username", name) config.set(("extractor", ), "password", name) config.set(("extractor", "nijie"), "username", email) config.set(("extractor", "seiga"), "username", email) config.set(("extractor", "pinterest"), "username", email2) config.set(("extractor", "pinterest"), "username", None) # login broken config.set(("extractor", "newgrounds"), "username", "d1618111") config.set(("extractor", "newgrounds"), "password", "d1618111") config.set(("extractor", "mangoxo"), "username", "LiQiang3") config.set(("extractor", "mangoxo"), "password", "5zbQF10_5u25259Ma") for category in ("danbooru", "instagram", "twitter", "subscribestar", "e621", "inkbunny"): config.set(("extractor", category), "username", None) config.set(("extractor", "mastodon.social"), "access-token", "Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ") config.set(("extractor", "deviantart"), "client-id", "7777") config.set(("extractor", "deviantart"), "client-secret", "ff14994c744d9208e5caeec7aab4a026") config.set(("extractor", "tumblr"), "api-key", "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6") config.set(("extractor", "tumblr"), "api-secret", "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj") config.set(("extractor", "tumblr"), "access-token", "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG") config.set(("extractor", "tumblr"), "access-token-secret", "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
def setUp(self): config.set((), "a", 1) config.set(("b", ), "a", 2) config.set(("b", "b"), "a", 3) config.set(("b", ), "c", "text") config.set(("b", "b"), "c", [8, 9])
# setup target directory path = util.path("archive", "testdb", str(datetime.date.today())) os.makedirs(path, exist_ok=True) for idx, extr, url, result in tests: # filename name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx) print(name) # config values setup_test_config() if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) if "range" in result: config.set(("image-range",), result["range"]) config.set(("chapter-range",), result["range"]) # write test data try: with open(os.path.join(path, name), "w") as outfile: job.DataJob(url, file=outfile, ensure_ascii=False).run() except KeyboardInterrupt: sys.exit()
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: config.set(key.split("."), value) if "range" in result: config.set(("image-range", ), result["range"]) config.set(("chapter-range", ), result["range"]) content = "content" in result else: content = False tjob = ResultJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: with self.assertRaises(result["exception"]): tjob.run() return try: tjob.run() except exception.StopExtraction: pass except exception.HttpError as exc: exc = str(exc) if re.match(r"5\d\d: ", exc) or \ re.search(r"\bRead timed out\b", exc): self._skipped.append((url, exc)) self.skipTest(exc) raise # test archive-id uniqueness self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) # test '_extractor' entries if tjob.queue: for url, kwdict in zip(tjob.list_url, tjob.list_keyword): if "_extractor" in kwdict: extr = kwdict["_extractor"].from_url(url) self.assertIsInstance(extr, kwdict["_extractor"]) self.assertEqual(extr.url, url) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) if "keyword" in result: keyword = result["keyword"] if isinstance(keyword, dict): for kwdict in tjob.list_keyword: self._test_kwdict(kwdict, keyword) else: # assume SHA1 hash self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.list_url), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.list_url), count) if "pattern" in result: self.assertGreater(len(tjob.list_url), 0) for url in tjob.list_url: self.assertRegex(url, result["pattern"])
def setUp(self): name = "gallerydl" email = "*****@*****.**" config.set(("cache", "file"), ":memory:") config.set(("downloader", "part"), False) config.set(("extractor", "username"), name) config.set(("extractor", "password"), name) config.set(("extractor", "nijie", "username"), email) config.set(("extractor", "seiga", "username"), email) config.set(("extractor", "deviantart", "client-id"), "7777") config.set(("extractor", "deviantart", "client-secret"), "ff14994c744d9208e5caeec7aab4a026") config.set(("extractor", "tumblr", "api-key"), "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
def _run_test(self, extr, url, result): if result: if "options" in result: for key, value in result["options"]: key = key.split(".") config.set(key[:-1], key[-1], value) if "range" in result: config.set((), "image-range", result["range"]) config.set((), "chapter-range", result["range"]) content = "content" in result else: content = False tjob = ResultJob(url, content=content) self.assertEqual(extr, tjob.extractor.__class__) if not result: return if "exception" in result: with self.assertRaises(result["exception"]): tjob.run() return try: tjob.run() except exception.StopExtraction: pass except exception.HttpError as exc: exc = str(exc) if re.match(r"'5\d\d ", exc) or \ re.search(r"\bRead timed out\b", exc): self._skipped.append((url, exc)) self.skipTest(exc) raise if result.get("archive", True): self.assertEqual( len(set(tjob.archive_list)), len(tjob.archive_list), "archive-id uniqueness", ) if tjob.queue: # test '_extractor' entries for url, kwdict in zip(tjob.url_list, tjob.kwdict_list): if "_extractor" in kwdict: extr = kwdict["_extractor"].from_url(url) self.assertIsInstance(extr, kwdict["_extractor"]) self.assertEqual(extr.url, url) else: # test 'extension' entries for kwdict in tjob.kwdict_list: self.assertIn("extension", kwdict) # test extraction results if "url" in result: self.assertEqual(result["url"], tjob.url_hash.hexdigest()) if "content" in result: expected = result["content"] digest = tjob.content_hash.hexdigest() if isinstance(expected, str): self.assertEqual(digest, expected, "content") else: # assume iterable self.assertIn(digest, expected, "content") if "keyword" in result: expected = result["keyword"] if isinstance(expected, dict): for kwdict in tjob.kwdict_list: self._test_kwdict(kwdict, expected) else: # assume SHA1 hash self.assertEqual(expected, tjob.kwdict_hash.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") expr = "{} {}".format(len(tjob.url_list), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer self.assertEqual(len(tjob.url_list), count) if "pattern" in result: self.assertGreater(len(tjob.url_list), 0) for url in tjob.url_list: self.assertRegex(url, result["pattern"])
def setUpClass(cls): cls.extractor = extractor.find("test:") cls.dir = tempfile.TemporaryDirectory() cls.fnum = 0 config.set(("base-directory",), cls.dir.name)