Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("--recreate", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    if args.recreate:
        urls = [
            test[0]
            for extr in extractor.extractors() if extr.category in args.urls
            for test in extr.test
        ]
    else:
        urls = args.urls

    config.load()
    for url in urls:
        tjob = job.TestJob(url, content=args.content)
        try:
            tjob.run()
        except Exception as exc:
            fmt = TESTDATA_EXCEPTION_FMT
            data = (exc.__class__.__name__,)
        else:
            fmt = TESTDATA_FMT
            data = (tjob.hash_url.hexdigest(),
                    tjob.hash_keyword.hexdigest(),
                    tjob.hash_content.hexdigest())
        print(tjob.extractor.__class__.__name__)
        print(fmt.format(url, *data))
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    config.load()
    for url in args.urls:
        hjob = job.HashJob(url, content=args.content)
        hjob.run()
        print(hjob.extractor.__class__.__name__)
        print(TESTDATA_FMT.format(url, hjob.hash_url.hexdigest(),
            hjob.hash_keyword.hexdigest(), hjob.hash_content.hexdigest()))
Ejemplo n.º 3
0
    def test_load(self):
        with tempfile.TemporaryDirectory() as base:
            path1 = os.path.join(base, "cfg1")
            with open(path1, "w") as file:
                file.write('{"a": 1, "b": {"a": 2, "c": "text"}}')

            path2 = os.path.join(base, "cfg2")
            with open(path2, "w") as file:
                file.write('{"a": 7, "b": {"a": 8, "e": "foo"}}')

            config.clear()
            config.load((path1, ))
            self.assertEqual(config.get((), "a"), 1)
            self.assertEqual(config.get(("b", ), "a"), 2)
            self.assertEqual(config.get(("b", ), "c"), "text")

            config.load((path2, ))
            self.assertEqual(config.get((), "a"), 7)
            self.assertEqual(config.get(("b", ), "a"), 8)
            self.assertEqual(config.get(("b", ), "c"), "text")
            self.assertEqual(config.get(("b", ), "e"), "foo")

            config.clear()
            config.load((path1, path2))
            self.assertEqual(config.get((), "a"), 7)
            self.assertEqual(config.get(("b", ), "a"), 8)
            self.assertEqual(config.get(("b", ), "c"), "text")
            self.assertEqual(config.get(("b", ), "e"), "foo")
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("--recreate", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    if args.recreate:
        urls = [
            test[0] for extr in extractor.extractors()
            if extr.category in args.urls for test in extr.test
        ]
    else:
        urls = args.urls

    config.load()
    for url in urls:
        hjob = job.HashJob(url, content=args.content)
        hjob.run()
        print(hjob.extractor.__class__.__name__)
        print(
            TESTDATA_FMT.format(url, hjob.hash_url.hexdigest(),
                                hjob.hash_keyword.hexdigest(),
                                hjob.hash_content.hexdigest()))
Ejemplo n.º 5
0
 def setUp(self):
     fd, self._configfile = tempfile.mkstemp()
     with os.fdopen(fd, "w") as file:
         file.write('{"a": "1", "b": {"c": "text"}}')
     config.load(self._configfile)
Ejemplo n.º 6
0
 def setUp(self):
     fd, self._configfile = tempfile.mkstemp()
     with os.fdopen(fd, "w") as file:
         file.write('{"a": "1", "b": {"a": 2, "c": "text"}}')
     config.load((self._configfile,))
Ejemplo n.º 7
0
    def handle_urllist(self, urls, _):
        self.urls.extend(urls)
        # prefix = ""
        # for url in urls:
        #     print(prefix, url, sep="")
        #     prefix = "| "

    def handle_queue(self, url, _):
        try:
            GetUrlJob(url, self, self.depth + 1).run()
        except exception.NoExtractorError:
            self._write_unsupported(url)


config.load()  # load default config files
config.set(("extractor", ), "image-range", "1")
config.set(("extractor", ), "chapter-range", "1")
config.set(("extractor", ), "download", False)
config.set(("extractor", ), "timeout", 10)
config.set(("extractor", ), "verify", False)
config.set(("extractor", ), "sleep", 0.01)

url_extractor = URLExtract()


# get cmd name from message
def cmd_from_message(message):
    cmd = None
    if 'entities' in message:
        for e in message['entities']:
Ejemplo n.º 8
0
#!/usr/bin/env python

import sys
import os.path
import datetime

ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.realpath(ROOTDIR))
from gallery_dl import extractor, job, config

tests = [([url[0] for url in extr.test if url[1]], extr)
         for extr in extractor.extractors() if hasattr(extr, "test")]

if len(sys.argv) > 1:
    tests = [(urls, extr) for urls, extr in tests if extr.category in sys.argv]

path = os.path.join(ROOTDIR, "archive/testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)
config.load()

for urls, extr in tests:
    for i, url in enumerate(urls):
        name = "%s-%s-%d.json" % (extr.category, extr.subcategory, i)
        print(name)
        with open(os.path.join(path, name), "w") as outfile:
            job.DataJob(url, file=outfile).run()
Ejemplo n.º 9
0
 def setUp(self):
     config.load()
     config.set(("cache", "file"), ":memory:")