예제 #1
0
 def petscan(self,
             categories,
             not_categories=None,
             article=False,
             year=None):
     searcher = PetScan()
     searcher.set_timeout(120)
     if article:
         # Article
         searcher.add_namespace(0)
     else:
         # Seite
         searcher.add_namespace(102)
     searcher.set_search_depth(5)
     if year:
         searcher.add_positive_category("Die Gartenlaube (" + str(year) +
                                        ")")
     else:
         searcher.add_positive_category("Die Gartenlaube")
     for category in categories:
         searcher.add_positive_category(category)
     if not_categories:
         for category in not_categories:
             searcher.add_negative_category(category)
     self.logger.debug(str(searcher))
     return len(searcher.run())
예제 #2
0
 def petscan(self, categories, negative_categories):
     searcher = PetScan()
     for category in categories:
         searcher.add_positive_category(category)
     for neg_category in negative_categories:
         searcher.add_negative_category(neg_category)
     searcher.set_logic_union()
     self.logger.debug(searcher)
     return searcher.run()
예제 #3
0
 def petscan(self, categories: List[str],
             negative_categories: List[str]) -> List[PetscanLemma]:
     searcher = PetScan()
     for category in categories:
         searcher.add_positive_category(category)
     for neg_category in negative_categories:
         searcher.add_negative_category(neg_category)
     searcher.set_logic_union()
     self.logger.debug(str(searcher))
     return searcher.run()
예제 #4
0
def crawler_cat_index_site():
    searcher_werke = PetScan()
    for item in watch_themes:
        searcher_werke.add_positive_category(item)
    searcher_werke.add_negative_category('Zeitschrift')
    searcher_werke.set_search_depth(4)
    searcher_werke.set_logic(log_or=True)
    list_werke = searcher_werke.run()
    for row in range(len(list_werke)):
        list_werke[row] = list_werke[row]['a']['title']
    pass
    all_sites = set([])
    counter = 1
    for werk in list_werke:
        searcher_sites = PetScan()
        searcher_sites.add_namespace(102)
        searcher_sites.add_positive_category('Fertig')
        searcher_sites.add_positive_category('Korrigiert')
        searcher_sites.add_positive_category('Unkorrigiert')
        searcher_sites.set_logic(log_or=True)
        searcher_sites.add_any_outlink(werk)
        # this link have a bug on catscan2
        # http://tools.wmflabs.org/catscan2/catscan2.php?project=wikisource&categories=Fertig%0D%0AKorrigiert%0D%0AUnkorrigiert&comb[union]=1&ns[102]=1&outlinks_any=Einige+Bemerkungen+%C3%BCber+die+von+Dr.+Liskovius+ver%C3%B6ffentlichten+Resultate+seiner+%E2%80%9EUntersuchungen+%C3%BCber+den+Einflu%C3%9F+der+verschiedenen+Weite+der+Labialpfeifen+auf+ihre+Tonh%C3%B6he%E2%80%9C&interface_language=de
        sites = searcher_sites.run()
        if len(sites) > 0:
            for row in range(len(sites)):
                sites[row] = sites[row]['a']['title']
            all_sites = all_sites | set(sites)
        else:
            searcher_index = PetScan()
            searcher_index.add_any_outlink(werk)
            searcher_index.add_namespace(104)
            searcher_index.add_positive_category('Index')
            index = searcher_index.run()
            if index:
                searcher_sites = PetScan()
                searcher_sites.add_namespace(102)
                searcher_sites.add_positive_category('Fertig')
                searcher_sites.add_positive_category('Korrigiert')
                searcher_sites.add_positive_category('Unkorrigiert')
                searcher_sites.set_logic(log_or=True)
                searcher_sites.add_any_outlink(index[0]['a']['nstext'] + ':' +
                                               index[0]['a']['title'])
                # this link have a bug on catscan2
                # http://tools.wmflabs.org/catscan2/catscan2.php?project=wikisource&categories=Fertig%0D%0AKorrigiert%0D%0AUnkorrigiert&comb[union]=1&ns[102]=1&outlinks_any=Einige+Bemerkungen+%C3%BCber+die+von+Dr.+Liskovius+ver%C3%B6ffentlichten+Resultate+seiner+%E2%80%9EUntersuchungen+%C3%BCber+den+Einflu%C3%9F+der+verschiedenen+Weite+der+Labialpfeifen+auf+ihre+Tonh%C3%B6he%E2%80%9C&interface_language=de
                sites = searcher_sites.run()
            else:
                print(werk)
        print(counter, '/', len(list_werke), ' result:', len(all_sites))
        counter += 1
    with open('output.txt', 'w', encoding='utf-8') as f:
        f.writelines(["Seite:%s\n" % item for item in all_sites])
예제 #5
0
def crawler_cat_index_site():
    searcher_werke  =PetScan()
    for item in watch_themes:
        searcher_werke.add_positive_category(item)
    searcher_werke.add_negative_category('Zeitschrift')
    searcher_werke.set_search_depth(4)
    searcher_werke.set_logic(log_or=True)
    list_werke = searcher_werke.run()
    for row in range(len(list_werke)):
        list_werke[row] = list_werke[row]['a']['title']
    pass
    all_sites = set([])
    counter = 1
    for werk in list_werke:
        searcher_sites = PetScan()
        searcher_sites.add_namespace('Seite')
        searcher_sites.add_positive_category('Fertig')
        searcher_sites.add_positive_category('Korrigiert')
        searcher_sites.add_positive_category('Unkorrigiert')
        searcher_sites.set_logic(log_or=True)
        searcher_sites.add_any_outlink(werk)
        # this link have a bug on catscan2
        # http://tools.wmflabs.org/catscan2/catscan2.php?project=wikisource&categories=Fertig%0D%0AKorrigiert%0D%0AUnkorrigiert&comb[union]=1&ns[102]=1&outlinks_any=Einige+Bemerkungen+%C3%BCber+die+von+Dr.+Liskovius+ver%C3%B6ffentlichten+Resultate+seiner+%E2%80%9EUntersuchungen+%C3%BCber+den+Einflu%C3%9F+der+verschiedenen+Weite+der+Labialpfeifen+auf+ihre+Tonh%C3%B6he%E2%80%9C&interface_language=de
        sites = searcher_sites.run()
        if len(sites) > 0:
            for row in range(len(sites)):
                sites[row] = sites[row]['a']['title']
            all_sites = all_sites | set(sites)
        else:
            searcher_index = PetScan()
            searcher_index.add_any_outlink(werk)
            searcher_index.add_namespace('Index')
            searcher_index.add_positive_category('Index')
            index = searcher_index.run()
            if index:
                searcher_sites = PetScan()
                searcher_sites.add_namespace('Seite')
                searcher_sites.add_positive_category('Fertig')
                searcher_sites.add_positive_category('Korrigiert')
                searcher_sites.add_positive_category('Unkorrigiert')
                searcher_sites.set_logic(log_or=True)
                searcher_sites.add_any_outlink(index[0]['a']['nstext'] + ':' + index[0]['a']['title'])
                # this link have a bug on catscan2
                # http://tools.wmflabs.org/catscan2/catscan2.php?project=wikisource&categories=Fertig%0D%0AKorrigiert%0D%0AUnkorrigiert&comb[union]=1&ns[102]=1&outlinks_any=Einige+Bemerkungen+%C3%BCber+die+von+Dr.+Liskovius+ver%C3%B6ffentlichten+Resultate+seiner+%E2%80%9EUntersuchungen+%C3%BCber+den+Einflu%C3%9F+der+verschiedenen+Weite+der+Labialpfeifen+auf+ihre+Tonh%C3%B6he%E2%80%9C&interface_language=de
                sites = searcher_sites.run()
            else:
                print(werk)
        print(counter, '/', len(list_werke), ' result:', len(all_sites))
        counter += 1
    with open('output.txt', 'w', encoding='utf-8') as f:
        f.writelines(["Seite:%s\n" % item  for item in all_sites])
예제 #6
0
 def petscan(self, categories, not_categories=None, article=False, year=None):
     searcher = PetScan()
     if article:
         searcher.add_namespace("Article")
     else:
         searcher.add_namespace("Seite")
     searcher.set_search_depth(5)
     if year:
         searcher.add_positive_category("Die Gartenlaube (" + str(year) + ")")
     else:
         searcher.add_positive_category("Die Gartenlaube")
     for category in categories:
         searcher.add_positive_category(category)
     if not_categories:
         for category in not_categories:
             searcher.add_negative_category(category)
     self.logger.debug(str(searcher))
     return len(searcher.run())
예제 #7
0
class TestCatScan(TestCase):
    def setUp(self):
        self.petscan = PetScan()

    def test_add_options(self):
        self.petscan.add_options({"max_age": "45"})
        self.petscan.add_options({"smaller": "300"})
        self.assertDictEqual({"smaller": "300", "max_age": "45"}, self.petscan.options)

    def test_add_categoy(self):
        self.petscan.add_positive_category("pos1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_positive_category("pos3", 2)
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg2")
        self.petscan.add_negative_category("neg3", 3)
        self.assertEqual(["pos1", "pos2", "pos3|2"], self.petscan.categories["positive"])
        self.assertEqual(["neg1", "neg2", "neg3|3"], self.petscan.categories["negative"])

    def test_add_namespace(self):
        self.petscan.add_namespace(0)
        self.petscan.add_namespace("Datei")
        self.petscan.add_namespace([2, "Vorlage"])
        self.assertDictEqual({"ns[0]": "1", "ns[2]": "1", "ns[6]": "1", "ns[10]": "1"}, self.petscan.options)

    def test_activate_redirects(self):
        self.petscan.activate_redirects()
        self.assertDictEqual({"show_redirects": "yes"}, self.petscan.options)

    def test_deactivate_redirects(self):
        self.petscan.deactivate_redirects()
        self.assertDictEqual({"show_redirects": "no"}, self.petscan.options)

    def test_last_change_before(self):
        self.petscan.last_change_before(datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"before": "12340101020242"}, self.petscan.options)

    def test_last_change_after(self):
        self.petscan.last_change_after(datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"after": "12340101020242"}, self.petscan.options)

    def test_max_age(self):
        self.petscan.max_age(1234)
        self.assertDictEqual({"max_age": "1234"}, self.petscan.options)

    def test_only_new(self):
        self.petscan.only_new()
        self.assertDictEqual({"only_new": "1"}, self.petscan.options)

    def test_smaller_then(self):
        self.petscan.smaller_then(42)
        self.assertDictEqual({"smaller": "42"}, self.petscan.options)

    def test_larger_then(self):
        self.petscan.larger_then(42)
        self.assertDictEqual({"larger": "42"}, self.petscan.options)

    def test_get_wikidata(self):
        self.petscan.get_wikidata_items()
        self.assertDictEqual({"wikidata_item": "any"}, self.petscan.options)

    def test_get_Pages_with_wikidata(self):
        self.petscan.get_pages_with_wd_items()
        self.assertDictEqual({"wikidata_item": "with"}, self.petscan.options)

    def test_get_Pages_without_wikidata(self):
        self.petscan.get_pages_without_wd_items()
        self.assertDictEqual({"wikidata_item": "without"}, self.petscan.options)

    def test_set_or(self):
        self.petscan.set_logic_union()
        self.assertDictEqual({"combination": "union"}, self.petscan.options)

    def test_set_regex(self):
        self.petscan.set_regex_filter("abc")
        self.assertDictEqual({"regexp_filter": "abc"}, self.petscan.options)

    def test_set_last_edits(self):
        self.petscan.set_last_edit_bots(True)
        self.petscan.set_last_edit_anons(False)
        self.petscan.set_last_edit_flagged()
        self.assertDictEqual({"edits[bots]": "yes", "edits[anons]": "no", "edits[flagged]": "yes"}, self.petscan.options)

    def test_construct_cat_string(self):
        self.petscan.add_positive_category("pos 1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg 2")
        self.petscan.add_negative_category("neg3")
        self.assertEqual("pos+1\r\npos2", self.petscan._construct_list_argument(self.petscan.categories["positive"]))
        self.assertEqual("neg1\r\nneg+2\r\nneg3",
                         self.petscan._construct_list_argument(self.petscan.categories["negative"]))

    def test_construct_templates(self):
        self.petscan.add_yes_template("yes1")
        self.petscan.add_yes_template("yes2")
        self.petscan.add_any_template("any1")
        self.petscan.add_any_template("any2")
        self.petscan.add_any_template("any3")
        self.petscan.add_no_template("no1")
        self.petscan.add_no_template("no2")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=de&project=wikisource&templates_yes=yes1%0D%0Ayes2&templates_any=any1%0D%0Aany2%0D%0Aany3&templates_no=no1%0D%0Ano2")

    def test_construct_outlinks(self):
        self.petscan.add_yes_outlink("yes1")
        self.petscan.add_yes_outlink("yes2")
        self.petscan.add_any_outlink("any1")
        self.petscan.add_any_outlink("any2")
        self.petscan.add_any_outlink("any3")
        self.petscan.add_no_outlink("no1")
        self.petscan.add_no_outlink("no2")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=de&project=wikisource&outlinks_yes=yes1%0D%0Ayes2&outlinks_any=any1%0D%0Aany2%0D%0Aany3&outlinks_no=no1%0D%0Ano2")

    def test_construct_links_to(self):
        self.petscan.add_yes_links_to("yes1")
        self.petscan.add_yes_links_to("yes2")
        self.petscan.add_any_links_to("any1")
        self.petscan.add_any_links_to("any2")
        self.petscan.add_any_links_to("any3")
        self.petscan.add_no_links_to("no1")
        self.petscan.add_no_links_to("no2")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=de&project=wikisource&links_to_all=yes1%0D%0Ayes2&links_to_any=any1%0D%0Aany2%0D%0Aany3&links_to_no=no1%0D%0Ano2")


    def test_construct_options(self):
        self.petscan.options = {"max_age": "1234",
                                 "get_q": "1",
                                 "show_redirects": "yes"}
        self.assertEqual("&max_age=1234" in str(self.petscan), True)
        self.assertEqual("&get_q=1" in str(self.petscan), True)
        self.assertEqual("&show_redirects=yes" in str(self.petscan), True)

    def test_construct_string(self):
        self.petscan.set_language("en")
        self.petscan.set_project("wikipedia")
        # only a positive category
        self.petscan.add_positive_category("test")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=en&project=wikipedia&categories=test")
        # only a negative category
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_negative_category("test")
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=en&project=wikipedia&negcats=test")
        # only a option
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_options({"max_age": "10"})
        self.assertEqual(str(self.petscan),
                         "https://petscan.wmflabs.org/?language=en&project=wikipedia&max_age=10")

    def test_do_positive(self):
        with requests_mock.mock() as mock:
            mock.get("https://petscan.wmflabs.org/"
                     "?language=de&project=wikisource&format=json&doit=1",
                     text='{"n": "result","a": {"querytime_sec": 1.572163,'
                          '"query": "https://petscan.wmflabs.org/?language=de'
                          '&project=wikisource&categories=Autoren&get_q=1'
                          '&show_redirects=no&ns[0]=1&max_age=48'
                          '&format=json&doit=1"},'
                          '"*": [{"n": "combination",'
                          '"a": {"type": "subset",'
                          '"*": [{"id": 3279,'
                          '"len": 10197,'
                          '"n": "page",'
                          '"namespace": 0,'
                          '"nstext": "",'
                          '"q": "Q60644",'
                          '"title": "Friedrich_Rückert",'
                          '"touched": "20161024211701"}]}}]}')
            self.assertEqual(self.petscan.run(), [{"id": 3279,
                                                   "len": 10197,
                                                   "n": "page",
                                                   "namespace": 0,
                                                   "nstext": "",
                                                   "q": "Q60644",
                                                   "title": "Friedrich_Rückert",
                                                   "touched": "20161024211701"}])

    def test_do_negative(self):
        with requests_mock.mock() as mock:
            mock.get("https://petscan.wmflabs.org/"
                     "?language=de&project=wikisource&format=json&doit=1",
                     status_code=404)
            with self.assertRaises(ConnectionError):
                self.petscan.run()
예제 #8
0
class TestCatScan(TestCase):
    def setUp(self):
        self.petscan = PetScan()

    def test_add_options(self):
        self.petscan.add_options({"max_age": "45"})
        self.petscan.add_options({"smaller": "300"})
        self.assertDictEqual({
            "smaller": "300",
            "max_age": "45"
        }, self.petscan.options)

    def test_add_categoy(self):
        self.petscan.add_positive_category("pos1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_positive_category("pos3", 2)
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg2")
        self.petscan.add_negative_category("neg3", 3)
        self.assertEqual(["pos1", "pos2", "pos3|2"],
                         self.petscan.categories["positive"])
        self.assertEqual(["neg1", "neg2", "neg3|3"],
                         self.petscan.categories["negative"])

    def test_add_namespace(self):
        self.petscan.add_namespace(0)
        self.petscan.add_namespace([2, 10])
        self.assertDictEqual({
            "ns[0]": "1",
            "ns[2]": "1",
            "ns[10]": "1"
        }, self.petscan.options)

    def test_activate_redirects(self):
        self.petscan.activate_redirects()
        self.assertDictEqual({"show_redirects": "yes"}, self.petscan.options)

    def test_deactivate_redirects(self):
        self.petscan.deactivate_redirects()
        self.assertDictEqual({"show_redirects": "no"}, self.petscan.options)

    def test_last_change_before(self):
        self.petscan.last_change_before(
            datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"before": "12340101020242"},
                             self.petscan.options)

    def test_last_change_after(self):
        self.petscan.last_change_after(
            datetime(year=1234, month=1, day=1, hour=2, minute=2, second=42))
        self.assertDictEqual({"after": "12340101020242"}, self.petscan.options)

    def test_max_age(self):
        self.petscan.max_age(1234)
        self.assertDictEqual({"max_age": "1234"}, self.petscan.options)

    def test_only_new(self):
        self.petscan.only_new()
        self.assertDictEqual({"only_new": "1"}, self.petscan.options)

    def test_smaller_then(self):
        self.petscan.smaller_then(42)
        self.assertDictEqual({"smaller": "42"}, self.petscan.options)

    def test_larger_then(self):
        self.petscan.larger_then(42)
        self.assertDictEqual({"larger": "42"}, self.petscan.options)

    def test_get_wikidata(self):
        self.petscan.get_wikidata_items()
        self.assertDictEqual({"wikidata_item": "any"}, self.petscan.options)

    def test_get_Pages_with_wikidata(self):
        self.petscan.get_pages_with_wd_items()
        self.assertDictEqual({"wikidata_item": "with"}, self.petscan.options)

    def test_get_Pages_without_wikidata(self):
        self.petscan.get_pages_without_wd_items()
        self.assertDictEqual({"wikidata_item": "without"},
                             self.petscan.options)

    def test_set_or(self):
        self.petscan.set_logic_union()
        self.assertDictEqual({"combination": "union"}, self.petscan.options)

    def test_set_regex(self):
        self.petscan.set_regex_filter("abc")
        self.assertDictEqual({"regexp_filter": "abc"}, self.petscan.options)

    def test_set_last_edits(self):
        self.petscan.set_last_edit_bots(True)
        self.petscan.set_last_edit_anons(False)
        self.petscan.set_last_edit_flagged()
        self.assertDictEqual(
            {
                "edits[bots]": "yes",
                "edits[anons]": "no",
                "edits[flagged]": "yes"
            }, self.petscan.options)

    def test_construct_cat_string(self):
        self.petscan.add_positive_category("pos 1")
        self.petscan.add_positive_category("pos2")
        self.petscan.add_negative_category("neg1")
        self.petscan.add_negative_category("neg 2")
        self.petscan.add_negative_category("neg3")
        self.assertEqual(
            "pos+1\r\npos2",
            self.petscan._construct_list_argument(
                self.petscan.categories["positive"]))
        self.assertEqual(
            "neg1\r\nneg+2\r\nneg3",
            self.petscan._construct_list_argument(
                self.petscan.categories["negative"]))

    def test_construct_templates(self):
        self.petscan.add_yes_template("yes1")
        self.petscan.add_yes_template("yes2")
        self.petscan.add_any_template("any1")
        self.petscan.add_any_template("any2")
        self.petscan.add_any_template("any3")
        self.petscan.add_no_template("no1")
        self.petscan.add_no_template("no2")
        self.assertEqual(
            str(self.petscan), "https://petscan.wmflabs.org/?language=de"
            "&project=wikisource"
            "&templates_yes=yes1%0D%0Ayes2"
            "&templates_any=any1%0D%0Aany2%0D%0Aany3"
            "&templates_no=no1%0D%0Ano2")

    def test_construct_outlinks(self):
        self.petscan.add_yes_outlink("yes1")
        self.petscan.add_yes_outlink("yes2")
        self.petscan.add_any_outlink("any1")
        self.petscan.add_any_outlink("any2")
        self.petscan.add_any_outlink("any3")
        self.petscan.add_no_outlink("no1")
        self.petscan.add_no_outlink("no2")
        self.assertEqual(
            str(self.petscan), "https://petscan.wmflabs.org/?language=de"
            "&project=wikisource"
            "&outlinks_yes=yes1%0D%0Ayes2"
            "&outlinks_any=any1%0D%0Aany2%0D%0Aany3"
            "&outlinks_no=no1%0D%0Ano2")

    def test_construct_links_to(self):
        self.petscan.add_yes_links_to("yes1")
        self.petscan.add_yes_links_to("yes2")
        self.petscan.add_any_links_to("any1")
        self.petscan.add_any_links_to("any2")
        self.petscan.add_any_links_to("any3")
        self.petscan.add_no_links_to("no1")
        self.petscan.add_no_links_to("no2")
        self.assertEqual(
            str(self.petscan), "https://petscan.wmflabs.org/?language=de"
            "&project=wikisource"
            "&links_to_all=yes1%0D%0Ayes2"
            "&links_to_any=any1%0D%0Aany2%0D%0Aany3"
            "&links_to_no=no1%0D%0Ano2")

    def test_construct_options(self):
        self.petscan.options = {
            "max_age": "1234",
            "get_q": "1",
            "show_redirects": "yes"
        }
        self.assertEqual("&max_age=1234" in str(self.petscan), True)
        self.assertEqual("&get_q=1" in str(self.petscan), True)
        self.assertEqual("&show_redirects=yes" in str(self.petscan), True)

    def test_construct_string(self):
        self.petscan.set_language("en")
        self.petscan.set_project("wikipedia")
        # only a positive category
        self.petscan.add_positive_category("test")
        self.assertEqual(
            str(self.petscan),
            "https://petscan.wmflabs.org/?language=en&project=wikipedia&categories=test"
        )
        # only a negative category
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_negative_category("test")
        self.assertEqual(
            str(self.petscan),
            "https://petscan.wmflabs.org/?language=en&project=wikipedia&negcats=test"
        )
        # only a option
        self.petscan.categories = {"positive": [], "negative": []}
        self.petscan.add_options({"max_age": "10"})
        self.assertEqual(
            str(self.petscan),
            "https://petscan.wmflabs.org/?language=en&project=wikipedia&max_age=10"
        )

    def test_do_positive(self):
        with requests_mock.mock() as mock:
            mock.get(
                "https://petscan.wmflabs.org/"
                "?language=de&project=wikisource&format=json&doit=1",
                text='{"n": "result","a": {"querytime_sec": 1.572163,'
                '"query": "https://petscan.wmflabs.org/?language=de'
                '&project=wikisource&categories=Autoren&get_q=1'
                '&show_redirects=no&ns[0]=1&max_age=48'
                '&format=json&doit=1"},'
                '"*": [{"n": "combination",'
                '"a": {"type": "subset",'
                '"*": [{"id": 3279,'
                '"len": 10197,'
                '"n": "page",'
                '"namespace": 0,'
                '"nstext": "",'
                '"q": "Q60644",'
                '"title": "Friedrich_Rückert",'
                '"touched": "20161024211701"}]}}]}')
            self.assertEqual(self.petscan.run(), [{
                "id": 3279,
                "len": 10197,
                "n": "page",
                "namespace": 0,
                "nstext": "",
                "q": "Q60644",
                "title": "Friedrich_Rückert",
                "touched": "20161024211701"
            }])

    def test_do_negative(self):
        with requests_mock.mock() as mock:
            mock.get(
                "https://petscan.wmflabs.org/"
                "?language=de&project=wikisource&format=json&doit=1",
                status_code=404)
            with self.assertRaises(PetScanException):
                self.petscan.run()