예제 #1
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            content = "content" in result
        else:
            content = False

        tjob = job.TestJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            self.assertRaises(result["exception"], tjob.run)
            return

        tjob.run()
        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())
        if "keyword" in result:
            self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest())
        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())
        if "count" in result:
            self.assertEqual(len(tjob.urllist), int(result["count"]))
        if "pattern" in result:
            for url in tjob.urllist:
                self.assertRegex(url, result["pattern"])
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("--recreate", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    if args.recreate:
        urls = [
            test[0] for extr in extractor.extractors()
            if extr.category in args.urls for test in extr.test
        ]
    else:
        urls = args.urls

    config.load()
    config.set(("downloader", "part"), False)
    for url in urls:
        tjob = job.TestJob(url, content=args.content)
        try:
            tjob.run()
        except Exception as exc:
            fmt = TESTDATA_EXCEPTION_FMT
            data = (exc.__class__.__name__, )
        else:
            fmt = TESTDATA_FMT
            data = (tjob.hash_url.hexdigest(), tjob.hash_keyword.hexdigest(),
                    tjob.hash_content.hexdigest())
        print(tjob.extractor.__class__.__name__)
        print(fmt.format(url, *data))
예제 #3
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            if "range" in result:
                config.set(("_", "image", "range"), (result["range"],))
            content = "content" in result
        else:
            content = False

        tjob = job.TestJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            self.assertRaises(result["exception"], tjob.run)
            return
        try:
            tjob.run()
        except exception.StopExtraction:
            pass
        except exception.HttpError as exc:
            if re.match(r"5\d\d HTTP Error:", str(exc)):
                self.skipTest(exc)
            raise

        # test archive-id uniqueness
        self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())

        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())

        if "keyword" in result:
            keyword = result["keyword"]
            if isinstance(keyword, dict):
                for kwdict in tjob.list_keyword:
                    self._test_kwdict(kwdict, keyword)
            else:  # assume SHA1 hash
                self.assertEqual(keyword, tjob.hash_keyword.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.list_url), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.list_url), count)

        if "pattern" in result:
            self.assertGreater(len(tjob.list_url), 0)
            for url in tjob.list_url:
                self.assertRegex(url, result["pattern"])
예제 #4
0
 def test_interpolate(self):
     self.assertEqual(config.interpolate(["a"]), "1")
     self.assertEqual(config.interpolate(["b", "a"]), "1")
     self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
     self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
     config.set(["d"], 123)
     self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
     self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
예제 #5
0
 def test_interpolate(self):
     self.assertEqual(config.interpolate(["a"]), "1")
     self.assertEqual(config.interpolate(["b", "a"]), "1")
     self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
     self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
     config.set(["d"], 123)
     self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
     self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
예제 #6
0
 def setUp(self):
     name = "gallerydl"
     email = "*****@*****.**"
     config.set(("cache", "file"), ":memory:")
     config.set(("extractor", "username"), name)
     config.set(("extractor", "password"), name)
     config.set(("extractor", "nijie", "username"), email)
     config.set(("extractor", "seiga", "username"), email)
예제 #7
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            content = "content" in result
        else:
            content = False

        tjob = job.TestJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            self.assertRaises(result["exception"], tjob.run)
            return

        try:
            tjob.run()
        except exception.HttpError as exc:
            try:
                if 500 <= exc.args[0].response.status_code < 600:
                    self.skipTest(exc)
            except AttributeError:
                pass
            raise

        # test archive-id uniqueness
        self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())

        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())

        if "keyword" in result:
            keyword = result["keyword"]
            if isinstance(keyword, dict):
                for kwdict in tjob.list_keyword:
                    self._test_kwdict(kwdict, keyword)
            else:  # assume SHA1 hash
                self.assertEqual(keyword, tjob.hash_keyword.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.list_url), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.list_url), count)

        if "pattern" in result:
            for url in tjob.list_url:
                self.assertRegex(url, result["pattern"])
예제 #8
0
 def _test_warning(self, filename, exc):
     config.set(CKEY, filename)
     log = logging.getLogger("test")
     with mock.patch.object(log, "warning") as mock_warning:
         cookies = extractor.find("test:").session.cookies
         self.assertEqual(len(cookies), 0)
         self.assertEqual(mock_warning.call_count, 1)
         self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
         self.assertIsInstance(mock_warning.call_args[0][1], exc)
예제 #9
0
 def _test_warning(self, filename, exc):
     config.set((), "cookies", filename)
     log = logging.getLogger("test")
     with mock.patch.object(log, "warning") as mock_warning:
         cookies = extractor.find("test:").session.cookies
         self.assertEqual(len(cookies), 0)
         self.assertEqual(mock_warning.call_count, 1)
         self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
         self.assertIsInstance(mock_warning.call_args[0][1], exc)
예제 #10
0
    def test_cookiefile(self):
        config.set(CKEY, self.cookiefile)

        cookies = extractor.find("test:").session.cookies
        self.assertEqual(len(cookies), 1)

        cookie = next(iter(cookies))
        self.assertEqual(cookie.domain, ".example.org")
        self.assertEqual(cookie.path, "/")
        self.assertEqual(cookie.name, "NAME")
        self.assertEqual(cookie.value, "VALUE")
예제 #11
0
    def test_cookiefile(self):
        config.set((), "cookies", self.cookiefile)

        cookies = extractor.find("test:").session.cookies
        self.assertEqual(len(cookies), 1)

        cookie = next(iter(cookies))
        self.assertEqual(cookie.domain, ".example.org")
        self.assertEqual(cookie.path, "/")
        self.assertEqual(cookie.name, "NAME")
        self.assertEqual(cookie.value, "VALUE")
예제 #12
0
    def test_private(self):
        config.set(("output", ), "private", True)
        extr = TestExtractor.from_url("test:")
        tjob = self.jobclass(extr, file=io.StringIO())

        tjob.run()

        for i in range(1, 4):
            self.assertEqual(
                tjob.data[i][2]["_fallback"],
                ("https://example.org/alt/{}.jpg".format(i), ),
            )
예제 #13
0
    def test_sleep(self):
        extr = TestExtractor.from_url("test:")
        tjob = self.jobclass(extr, file=io.StringIO())

        config.set((), "sleep-extractor", 123)
        with patch("time.sleep") as sleep:
            tjob.run()
        sleep.assert_called_once_with(123)

        config.set((), "sleep-extractor", 0)
        with patch("time.sleep") as sleep:
            tjob.run()
        sleep.assert_not_called()
예제 #14
0
 def test_cookie_login(self):
     extr_cookies = {
         "exhentai": ("ipb_member_id", "ipb_pass_hash"),
         "nijie": ("nemail", "nlogin"),
         "sankaku": ("login", "pass_hash"),
         "seiga": ("user_session",),
     }
     for category, cookienames in extr_cookies.items():
         cookies = {name: "value" for name in cookienames}
         config.set(CKEY, cookies)
         extr = _get_extractor(category)
         with mock.patch.object(extr, "_login_impl") as mock_login:
             extr.login()
             mock_login.assert_not_called()
예제 #15
0
 def test_cookie_login(self):
     extr_cookies = {
         "exhentai": ("ipb_member_id", "ipb_pass_hash"),
         "idolcomplex": ("login", "pass_hash"),
         "nijie": ("nemail", "nlogin"),
         "seiga": ("user_session", ),
     }
     for category, cookienames in extr_cookies.items():
         cookies = {name: "value" for name in cookienames}
         config.set((), "cookies", cookies)
         extr = _get_extractor(category)
         with mock.patch.object(extr, "_login_impl") as mock_login:
             extr.login()
             mock_login.assert_not_called()
예제 #16
0
    def test_num_string(self):
        extr = TestExtractor.from_url("test:")
        tjob = self.jobclass(extr, file=io.StringIO())

        with patch("gallery_dl.util.number_to_string") as nts:
            tjob.run()
        self.assertEqual(len(nts.call_args_list), 0)

        config.set(("output", ), "num-to-str", True)
        with patch("gallery_dl.util.number_to_string") as nts:
            tjob.run()
        self.assertEqual(len(nts.call_args_list), 52)

        tjob.run()
        self.assertEqual(tjob.data[-1][0], Message.Url)
        self.assertEqual(tjob.data[-1][2]["num"], "3")
    def __init__(self,
                 subreddit,
                 path,
                 sort_type,
                 limit,
                 previous_id=None,
                 debug=False,
                 disable_db=False,
                 disable_im=False):
        # call constructor of GetSubredditSubmissions class passing args
        super().__init__(subreddit, path, sort_type, limit, previous_id, debug)

        self.log = logging.getLogger('DownloadSubredditSubmissions')
        self.Exceptions = (FileExistsException, FileExistsError,
                           ImgurException, HTTPError, ValueError, SSLError,
                           NoExtractorError, TurboPalmTreeException)

        self.disable_im = disable_im
        if not self.disable_im:
            # elastic search variables
            self.es_index, self.es_doc_type = 'tpt_images', 'image'
            # object used to add, search and compare images in elasticsearch
            # for duplicate deletion
            self.im = ImageMatchManager(index=self.es_index,
                                        doc_type=self.es_doc_type,
                                        distance_cutoff=0.40)

        self.disable_db = disable_db
        if not self.disable_db:
            # get db manager object for inserting and saving data to db
            try:
                self.db = TPTDatabaseManager()
            except sqlite3.OperationalError as e:
                self.log.error("{}: {}".format(e.__class__.__name__, str(e)))
                self.disable_db = True

        # used to check if url ends with any of these
        self.image_extensions = ('.png', '.jpg', '.jpeg', '.gif')
        video_extensions = ('.webm', '.mp4')
        self.media_extensions = tuple(
            chain(self.image_extensions, video_extensions))

        # prevent gallery-dl module from printing to std output
        gallery_dl_config.set(("output", ), "mode", "null")
예제 #18
0
    def test_extractor_filter(self):
        extr = TestExtractor.from_url("test:")
        tjob = self.jobclass(extr)

        func = tjob._build_extractor_filter()
        self.assertEqual(func(TestExtractor), False)
        self.assertEqual(func(TestExtractorParent), False)
        self.assertEqual(func(TestExtractorAlt), True)

        config.set((), "blacklist", ":test_subcategory")
        func = tjob._build_extractor_filter()
        self.assertEqual(func(TestExtractor), False)
        self.assertEqual(func(TestExtractorParent), True)
        self.assertEqual(func(TestExtractorAlt), False)

        config.set((), "whitelist", "test_category:test_subcategory")
        func = tjob._build_extractor_filter()
        self.assertEqual(func(TestExtractor), True)
        self.assertEqual(func(TestExtractorParent), False)
        self.assertEqual(func(TestExtractorAlt), False)
예제 #19
0
 def test_set(self):
     config.set((), "c", [1, 2, 3])
     config.set(("b", ), "c", [1, 2, 3])
     config.set(("e", "f"), "g", value=234)
     self.assertEqual(config.get((), "c"), [1, 2, 3])
     self.assertEqual(config.get(("b", ), "c"), [1, 2, 3])
     self.assertEqual(config.get(("e", "f"), "g"), 234)
예제 #20
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            content = "content" in result
        else:
            content = False

        tjob = job.TestJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            self.assertRaises(result["exception"], tjob.run)
            return

        try:
            tjob.run()
        except exception.HttpError as exc:
            try:
                if 500 <= exc.args[0].response.status_code < 600:
                    self.skipTest(exc)
            except AttributeError as e:
                pass
            raise

        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())
        if "keyword" in result:
            self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest())
        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())
        if "count" in result:
            self.assertEqual(len(tjob.urllist), int(result["count"]))
        if "pattern" in result:
            for url in tjob.urllist:
                self.assertRegex(url, result["pattern"])
예제 #21
0
    def test_interpolate(self):
        self.assertEqual(config.interpolate((), "a"), 1)
        self.assertEqual(config.interpolate(("b", ), "a"), 1)
        self.assertEqual(config.interpolate(("b", "b"), "a"), 1)

        self.assertEqual(config.interpolate((), "c"), None)
        self.assertEqual(config.interpolate(("b", ), "c"), "text")
        self.assertEqual(config.interpolate(("b", "b"), "c"), [8, 9])

        self.assertEqual(config.interpolate(("a", ), "g"), None)
        self.assertEqual(config.interpolate(("a", "a"), "g"), None)
        self.assertEqual(config.interpolate(("e", "f"), "g"), None)
        self.assertEqual(config.interpolate(("e", "f"), "g", 4), 4)

        self.assertEqual(config.interpolate(("b", ), "d", 1), 1)
        self.assertEqual(config.interpolate(("d", ), "d", 1), 1)
        config.set((), "d", 2)
        self.assertEqual(config.interpolate(("b", ), "d", 1), 2)
        self.assertEqual(config.interpolate(("d", ), "d", 1), 2)
        config.set(("b", ), "d", 3)
        self.assertEqual(config.interpolate(("b", ), "d", 1), 2)
        self.assertEqual(config.interpolate(("d", ), "d", 1), 2)
예제 #22
0
    def test_accumulate(self):
        self.assertEqual(config.accumulate((), "l"), [])

        config.set(()        , "l", [5, 6])
        config.set(("c",)    , "l", [3, 4])
        config.set(("c", "c"), "l", [1, 2])
        self.assertEqual(
            config.accumulate((), "l")        , [5, 6])
        self.assertEqual(
            config.accumulate(("c",), "l")    , [3, 4, 5, 6])
        self.assertEqual(
            config.accumulate(("c", "c"), "l"), [1, 2, 3, 4, 5, 6])

        config.set(("c",), "l", None)
        config.unset(("c", "c"), "l")
        self.assertEqual(
            config.accumulate((), "l")        , [5, 6])
        self.assertEqual(
            config.accumulate(("c",), "l")    , [5, 6])
        self.assertEqual(
            config.accumulate(("c", "c"), "l"), [5, 6])
예제 #23
0
    def test_custom(self):
        config.set((), "filename", "custom")
        config.set((), "directory", ("custom",))
        config.set((), "sleep-request", 321)
        extr = TestExtractor.from_url("test:")
        extr.request_interval = 123.456

        self.assertEqual(self._capture_stdout(extr), """\
Category / Subcategory
  "test_category" / "test_subcategory"
Filename format (custom):
  "custom"
Filename format (default):
  "test_{filename}.{extension}"
Directory format (custom):
  ["custom"]
Directory format (default):
  ["{category}"]
Request interval (custom):
  321
Request interval (default):
  123.456
""")
예제 #24
0
 def test_set(self):
     config.set(["b", "c"], [1, 2, 3])
     config.set(["e", "f", "g"], value=234)
     self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
     self.assertEqual(config.get(["e", "f", "g"]), 234)
예제 #25
0
 def setUp(self):
     self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
     config.set((), "cookies", self.cdict)
예제 #26
0
 def setUp(self):
     config.load()
     config.set(("cache", "file"), ":memory:")
예제 #27
0
 def setUp(self):
     self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
     config.set(CKEY, self.cdict)
예제 #28
0
 def test_set(self):
     config.set(["b", "c"], [1, 2, 3])
     config.set(["e", "f", "g"], value=234)
     self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
     self.assertEqual(config.get(["e", "f", "g"]), 234)
예제 #29
0
def setup_test_config():
    name = "gallerydl"
    email = "*****@*****.**"

    config.clear()
    config.set(("cache", "file"), ":memory:")
    config.set(("downloader", "part"), False)
    config.set(("extractor", "timeout"), 60)
    config.set(("extractor", "username"), name)
    config.set(("extractor", "password"), name)
    config.set(("extractor", "nijie", "username"), email)
    config.set(("extractor", "seiga", "username"), email)
    config.set(("extractor", "danbooru", "username"), None)
    config.set(("extractor", "twitter" , "username"), None)
    config.set(("extractor", "mangoxo" , "password"), "VZ8DL3983u")

    config.set(("extractor", "deviantart", "client-id"), "7777")
    config.set(("extractor", "deviantart", "client-secret"),
               "ff14994c744d9208e5caeec7aab4a026")

    config.set(("extractor", "tumblr", "api-key"),
               "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
    config.set(("extractor", "tumblr", "api-secret"),
               "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
    config.set(("extractor", "tumblr", "access-token"),
               "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
    config.set(("extractor", "tumblr", "access-token-secret"),
               "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
예제 #30
0
def setup_test_config():
    name = "gallerydl"
    email = "*****@*****.**"

    config.clear()
    config.set(("cache", "file"), ":memory:")
    config.set(("downloader", "part"), False)
    config.set(("downloader", "adjust-extensions"), False)
    config.set(("extractor", "timeout"), 60)
    config.set(("extractor", "username"), name)
    config.set(("extractor", "password"), name)
    config.set(("extractor", "nijie", "username"), email)
    config.set(("extractor", "seiga", "username"), email)

    config.set(("extractor", "danbooru", "username"), None)
    config.set(("extractor", "instagram", "username"), None)
    config.set(("extractor", "imgur", "username"), None)
    config.set(("extractor", "twitter", "username"), None)

    config.set(("extractor", "mangoxo", "username"), "LiQiang3")
    config.set(("extractor", "mangoxo", "password"), "5zbQF10_5u25259Ma")

    config.set(("extractor", "deviantart", "client-id"), "7777")
    config.set(("extractor", "deviantart", "client-secret"),
               "ff14994c744d9208e5caeec7aab4a026")

    config.set(("extractor", "tumblr", "api-key"),
               "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
    config.set(("extractor", "tumblr", "api-secret"),
               "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
    config.set(("extractor", "tumblr", "access-token"),
               "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
    config.set(("extractor", "tumblr", "access-token-secret"),
               "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
예제 #31
0
 def setUpClass(cls):
     cls.extractor = extractor.find("test:")
     cls.extractor.log.job = None
     cls.dir = tempfile.TemporaryDirectory()
     cls.fnum = 0
     config.set((), "base-directory", cls.dir.name)
예제 #32
0
 def setUpClass(cls):
     cls.dir = tempfile.TemporaryDirectory()
     cls.fnum = 0
     config.set((), "base-directory", cls.dir.name)
     cls.job = FakeJob()
예제 #33
0
 def setUpClass(cls):
     cls.extractor = extractor.find("test:")
     cls.dir = tempfile.TemporaryDirectory()
     config.set(("base-directory", ), cls.dir.name)
예제 #34
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            if "range" in result:
                config.set(("image-range",), result["range"])
                config.set(("chapter-range",), result["range"])
            content = "content" in result
        else:
            content = False

        tjob = ResultJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            self.assertRaises(result["exception"], tjob.run)
            return
        try:
            tjob.run()
        except exception.StopExtraction:
            pass
        except exception.HttpError as exc:
            if re.match(r"5\d\d: ", str(exc)):
                self.skipTest(exc)
            raise

        # test archive-id uniqueness
        self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))

        # test '_extractor' entries
        if tjob.queue:
            for url, kwdict in zip(tjob.list_url, tjob.list_keyword):
                if "_extractor" in kwdict:
                    extr = kwdict["_extractor"].from_url(url)
                    self.assertIsInstance(extr, kwdict["_extractor"])
                    self.assertEqual(extr.url, url)

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())

        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())

        if "keyword" in result:
            keyword = result["keyword"]
            if isinstance(keyword, dict):
                for kwdict in tjob.list_keyword:
                    self._test_kwdict(kwdict, keyword)
            else:  # assume SHA1 hash
                self.assertEqual(keyword, tjob.hash_keyword.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.list_url), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.list_url), count)

        if "pattern" in result:
            self.assertGreater(len(tjob.list_url), 0)
            for url in tjob.list_url:
                self.assertRegex(url, result["pattern"])
예제 #35
0
def setup_test_config():
    name = "gallerydl"
    email = "*****@*****.**"
    email2 = "*****@*****.**"

    config.clear()
    config.set(("cache", ), "file", None)
    config.set(("downloader", ), "part", False)
    config.set(("downloader", ), "adjust-extensions", False)
    config.set(("extractor", ), "timeout", 60)
    config.set(("extractor", ), "username", name)
    config.set(("extractor", ), "password", name)

    config.set(("extractor", "nijie"), "username", email)
    config.set(("extractor", "seiga"), "username", email)
    config.set(("extractor", "pinterest"), "username", email2)
    config.set(("extractor", "pinterest"), "username", None)  # login broken

    config.set(("extractor", "newgrounds"), "username", "d1618111")
    config.set(("extractor", "newgrounds"), "password", "d1618111")

    config.set(("extractor", "mangoxo"), "username", "LiQiang3")
    config.set(("extractor", "mangoxo"), "password", "5zbQF10_5u25259Ma")

    for category in ("danbooru", "instagram", "twitter", "subscribestar",
                     "e621", "inkbunny"):
        config.set(("extractor", category), "username", None)

    config.set(("extractor", "mastodon.social"), "access-token",
               "Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")

    config.set(("extractor", "deviantart"), "client-id", "7777")
    config.set(("extractor", "deviantart"), "client-secret",
               "ff14994c744d9208e5caeec7aab4a026")

    config.set(("extractor", "tumblr"), "api-key",
               "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
    config.set(("extractor", "tumblr"), "api-secret",
               "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
    config.set(("extractor", "tumblr"), "access-token",
               "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
    config.set(("extractor", "tumblr"), "access-token-secret",
               "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
예제 #36
0
 def setUp(self):
     config.set((), "a", 1)
     config.set(("b", ), "a", 2)
     config.set(("b", "b"), "a", 3)
     config.set(("b", ), "c", "text")
     config.set(("b", "b"), "c", [8, 9])
예제 #37
0

# setup target directory

path = util.path("archive", "testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)


for idx, extr, url, result in tests:

    # filename
    name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx)
    print(name)

    # config values
    setup_test_config()

    if "options" in result:
        for key, value in result["options"]:
            config.set(key.split("."), value)
    if "range" in result:
        config.set(("image-range",), result["range"])
        config.set(("chapter-range",), result["range"])

    # write test data
    try:
        with open(os.path.join(path, name), "w") as outfile:
            job.DataJob(url, file=outfile, ensure_ascii=False).run()
    except KeyboardInterrupt:
        sys.exit()
예제 #38
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            if "range" in result:
                config.set(("image-range", ), result["range"])
                config.set(("chapter-range", ), result["range"])
            content = "content" in result
        else:
            content = False

        tjob = ResultJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            with self.assertRaises(result["exception"]):
                tjob.run()
            return
        try:
            tjob.run()
        except exception.StopExtraction:
            pass
        except exception.HttpError as exc:
            exc = str(exc)
            if re.match(r"5\d\d: ", exc) or \
                    re.search(r"\bRead timed out\b", exc):
                self._skipped.append((url, exc))
                self.skipTest(exc)
            raise

        # test archive-id uniqueness
        self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))

        # test '_extractor' entries
        if tjob.queue:
            for url, kwdict in zip(tjob.list_url, tjob.list_keyword):
                if "_extractor" in kwdict:
                    extr = kwdict["_extractor"].from_url(url)
                    self.assertIsInstance(extr, kwdict["_extractor"])
                    self.assertEqual(extr.url, url)

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())

        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())

        if "keyword" in result:
            keyword = result["keyword"]
            if isinstance(keyword, dict):
                for kwdict in tjob.list_keyword:
                    self._test_kwdict(kwdict, keyword)
            else:  # assume SHA1 hash
                self.assertEqual(keyword, tjob.hash_keyword.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.list_url), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.list_url), count)

        if "pattern" in result:
            self.assertGreater(len(tjob.list_url), 0)
            for url in tjob.list_url:
                self.assertRegex(url, result["pattern"])
예제 #39
0
 def setUp(self):
     name = "gallerydl"
     email = "*****@*****.**"
     config.set(("cache", "file"), ":memory:")
     config.set(("downloader", "part"), False)
     config.set(("extractor", "username"), name)
     config.set(("extractor", "password"), name)
     config.set(("extractor", "nijie", "username"), email)
     config.set(("extractor", "seiga", "username"), email)
     config.set(("extractor", "deviantart", "client-id"), "7777")
     config.set(("extractor", "deviantart", "client-secret"),
                "ff14994c744d9208e5caeec7aab4a026")
     config.set(("extractor", "tumblr", "api-key"),
                "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
예제 #40
0
    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    key = key.split(".")
                    config.set(key[:-1], key[-1], value)
            if "range" in result:
                config.set((), "image-range", result["range"])
                config.set((), "chapter-range", result["range"])
            content = "content" in result
        else:
            content = False

        tjob = ResultJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            with self.assertRaises(result["exception"]):
                tjob.run()
            return
        try:
            tjob.run()
        except exception.StopExtraction:
            pass
        except exception.HttpError as exc:
            exc = str(exc)
            if re.match(r"'5\d\d ", exc) or \
                    re.search(r"\bRead timed out\b", exc):
                self._skipped.append((url, exc))
                self.skipTest(exc)
            raise

        if result.get("archive", True):
            self.assertEqual(
                len(set(tjob.archive_list)),
                len(tjob.archive_list),
                "archive-id uniqueness",
            )

        if tjob.queue:
            # test '_extractor' entries
            for url, kwdict in zip(tjob.url_list, tjob.kwdict_list):
                if "_extractor" in kwdict:
                    extr = kwdict["_extractor"].from_url(url)
                    self.assertIsInstance(extr, kwdict["_extractor"])
                    self.assertEqual(extr.url, url)
        else:
            # test 'extension' entries
            for kwdict in tjob.kwdict_list:
                self.assertIn("extension", kwdict)

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.url_hash.hexdigest())

        if "content" in result:
            expected = result["content"]
            digest = tjob.content_hash.hexdigest()
            if isinstance(expected, str):
                self.assertEqual(digest, expected, "content")
            else:  # assume iterable
                self.assertIn(digest, expected, "content")

        if "keyword" in result:
            expected = result["keyword"]
            if isinstance(expected, dict):
                for kwdict in tjob.kwdict_list:
                    self._test_kwdict(kwdict, expected)
            else:  # assume SHA1 hash
                self.assertEqual(expected, tjob.kwdict_hash.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.url_list), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.url_list), count)

        if "pattern" in result:
            self.assertGreater(len(tjob.url_list), 0)
            for url in tjob.url_list:
                self.assertRegex(url, result["pattern"])
예제 #41
0
 def setUpClass(cls):
     cls.extractor = extractor.find("test:")
     cls.dir = tempfile.TemporaryDirectory()
     cls.fnum = 0
     config.set(("base-directory",), cls.dir.name)