Ejemplo n.º 1
0
def main(*args):
    wikipedia_site = pywikibot.Site()  # Use the site configured in params/user-config
    commons_site = pywikibot.Site("commons", "commons")
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    commons_bot = CommonsBot(wikipedia_site, checker)
    callbacks = ArticleIteratorCallbacks(
        logging_callback=pywikibot.log,
        article_callback=commons_bot.cb_check_article
    )
    article_iterator = ArticleIterator(callbacks)
    article_iterator.log_every_n = 1
    parser = ArticleIteratorArgumentParser(article_iterator, None)
    run_cmd = commons_bot.run_continuously
    category_name = u"Images from Wiki Loves Monuments 2015 in Germany"
    start_time = first_day_of_month()
    for argument in pywikibot.handle_args(args):
        if argument.find("-category:") == 0:
            category_name = argument[10:]
            continue
        elif parser.check_argument(argument):
            continue
        elif argument.find("-start-at:") == 0:
            start_time_iso = argument[10:] + "T0:00:00Z"
            start_time = pywikibot.Timestamp.fromISOformat(start_time_iso)
        elif argument.find("-sleep-seconds:") == 0 and int(argument[15:]) > 0:
            commons_bot.sleep_seconds = int(argument[15:])
        elif argument == "-once":
            run_cmd = commons_bot.run_once
        elif argument == "-local-media":
            commons_site = wikipedia_site
    category = pywikibot.Category(commons_site, category_name)
    run_cmd(article_iterator, start_time, category)
Ejemplo n.º 2
0
def main(*args):
    site = pywikibot.Site()
    fetcher = CategoryFetcher(site)
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    checker_bot = CheckerBot(checker, site)
    all_categories = fetcher.get_categories()
    callbacks = ArticleIteratorCallbacks(
        category_callback=checker_bot.cb_store_category_result,
        article_callback=checker_bot.cb_check_article,
        logging_callback=pywikibot.log,
    )
    article_iterator = ArticleIterator(callbacks, categories=all_categories)
    parser = ArticleIteratorArgumentParser(article_iterator, fetcher)
    for argument in pywikibot.handle_args(list(args)):
        if parser.check_argument(argument):
            continue
        elif argument.find("-outputpage:") == 0:
            checker_bot.outputpage = argument[12:]
        elif argument.find("-exclude-articles:") == 0:
            page = pywikibot.Page(site, argument[18:])
            article_iterator.excluded_articles = load_excluded_articles_from_wiki(page)
    article_iterator.iterate_categories()

    if article_iterator.categories != all_categories:   # Don't update summary page if only single categories were crawled
        return
    summary = checker_bot.generate_summary_page()
    if checker_bot.outputpage:
        checker_bot.save_wikipage(summary, checker_bot.outputpage + u"/Zusammenfassung")
    else:
        pywikibot.output(u"Zusammenfassung")
        pywikibot.output(u"===============")
        pywikibot.output(summary)
        pywikibot.output(checker_bot.generate_config_table())
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='Generate JSON info about monument data in wiki text.')
    parser.add_argument('--monument_id', '-i', help='Unique ID of the monument. Validity will be checked.',
                        default='', metavar='ID')
    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
    args = parser.parse_args()
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    mapper = CommonscatMapper()
    info = get_template_info(checker, mapper, args.infile.read(), args.monument_id)
    utf8_writer = codecs.getwriter('utf8')
    json.dump(info, utf8_writer(sys.stdout))
Ejemplo n.º 4
0
def setup_instances():
    g.site_commons = mwclient.Site("commons.wikimedia.org")
    g.site_wikipedia = mwclient.Site("de.wikipedia.org")
    g.campaign_validator = CampaignValidator(g.site_commons)
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    mapper = CommonscatMapper()
    mapper.load_mapping("config/commonscat_mapping.json")
    g.page_information_collector = PageInformationCollector(checker, mapper)
    if app.config["REDIS_CACHE_PREFIX"]:
        g.campaign_cache = RedisCache(host=app.config["REDIS_HOST"], key_prefix=app.config["REDIS_CACHE_PREFIX"])
    else:
        g.campaign_cache = SimpleCache()
 def test_is_allowed_template_normalizes_underscores(self):
     template = Mock()
     template.name = u"Denkmalliste_Sachsen_Tabellenzeile"
     self.assertTrue(self.checker.is_allowed_template(template), "Template not found")
     # Check if config is normalized
     self.config[u"Denkmalliste_Brandenburg_Tabellenzeile"] = {
         "id": "ID",
         "id_check": "\\d{4,}",
         "id_check_description": u"Nummer, mindestens vierstellig"
     }
     self.checker = TemplateChecker(self.config)
     template.name = u"Denkmalliste Brandenburg Tabellenzeile"
     self.assertTrue(self.checker.is_allowed_template(template), "Template not found")
 def setUp(self):
     self.config = {
         u"Denkmalliste Sachsen Tabellenzeile": {
             "id": "ID",
             "id_check": "\\d{4,}",
             "id_check_description": u"Nummer, mindestens vierstellig"
         },
         u"Denkmalliste Bayern Tabellenzeile": {
             "id": "Nummer",
             "id_check": "D-\\d-\\d{3}",
             "id_check_description": u"Nummer im Format D-n-nnn"
         }
     }
     self.checker = TemplateChecker(self.config)
class TestTemplateChecker(unittest.TestCase):

    def setUp(self):
        self.config = {
            u"Denkmalliste Sachsen Tabellenzeile": {
                "id": "ID",
                "id_check": "\\d{4,}",
                "id_check_description": u"Nummer, mindestens vierstellig"
            },
            u"Denkmalliste Bayern Tabellenzeile": {
                "id": "Nummer",
                "id_check": "D-\\d-\\d{3}",
                "id_check_description": u"Nummer im Format D-n-nnn"
            }
        }
        self.checker = TemplateChecker(self.config)

    def create_article_with_text(self, text):
        """ Build an Article fixture """
        article = Mock()
        article.get.return_value = text
        article.isRedirectPage.return_value = False
        return article

    def test_text_contains_templates_finds_template_name(self):
        text = "{{Denkmalliste Sachsen Tabellenzeile|}}"
        self.assertTrue(self.checker.text_contains_templates(text))

    def test_get_id_returns_id(self):
        template = Mock()
        template.get.return_value = u"ID=12345"
        template.name = u"Denkmalliste Sachsen Tabellenzeile"
        self.assertEqual(self.checker.get_id(template), u"12345")

    def test_get_id_returns_empty_string_if_is_empty(self):
        template = Mock()
        template.get.return_value = u"ID="
        template.name = u"Denkmalliste Sachsen Tabellenzeile"
        self.assertEqual(self.checker.get_id(template), u"")

    def test_has_valid_id_true_for_valid_ids(self):
        template = Mock()
        template.get.return_value = u"ID=12345"
        template.name = u"Denkmalliste Sachsen Tabellenzeile"
        self.assertTrue(self.checker.has_valid_id(template))

    def test_has_valid_id_true_for_invalid_ids(self):
        template = Mock()
        template.get.return_value = u"ID=123"
        template.name = u"Denkmalliste Sachsen Tabellenzeile"
        self.assertFalse(self.checker.has_valid_id(template))

    def test_setting_configuration_compiles_regex_patterns(self):
        self.checker.config = {
            "Denkmalliste Bayern Tabellenzeile": {
                "id": "ID",
                "id_check": "D-d{3}"
            }
        }
        expected_class = type(re.compile("test"))
        self.assertIsInstance(self.checker.config["Denkmalliste Bayern Tabellenzeile"]["id_check"], expected_class)

    def test_is_allowed_template_checks_if_template_name_is_configured(self):
        template = Mock()
        template.name = u"Denkmalliste Sachsen Tabellenzeile"
        self.assertTrue(self.checker.is_allowed_template(template))
        template.name = u"Denkmalliste Kleinkleckersdorf Tabellenzeile"
        self.assertFalse(self.checker.is_allowed_template(template))

    def test_is_allowed_template_normalizes_underscores(self):
        template = Mock()
        template.name = u"Denkmalliste_Sachsen_Tabellenzeile"
        self.assertTrue(self.checker.is_allowed_template(template), "Template not found")
        # Check if config is normalized
        self.config[u"Denkmalliste_Brandenburg_Tabellenzeile"] = {
            "id": "ID",
            "id_check": "\\d{4,}",
            "id_check_description": u"Nummer, mindestens vierstellig"
        }
        self.checker = TemplateChecker(self.config)
        template.name = u"Denkmalliste Brandenburg Tabellenzeile"
        self.assertTrue(self.checker.is_allowed_template(template), "Template not found")

    def test_check_for_errors_skips_redirect_pages(self):
        article = Mock()
        article.isRedirectPage.return_value = True
        self.assertEqual(None, self.checker.check_article_for_errors(article))

    def test_check_for_errors_reports_pages_without_templates(self):
        article = self.create_article_with_text(u"Just some test text")
        errors = self.checker.check_article_for_errors(article)
        self.assertEqual({TemplateChecker.ERROR_MISSING_TEMPLATE: True}, errors)

    def test_check_for_errors_reports_invalid_ids(self):
        article = self.create_article_with_text(u"{{Denkmalliste Sachsen Tabellenzeile|ID=1}}")
        errors = self.checker.check_article_for_errors(article)
        self.assertEqual({TemplateChecker.ERROR_INVALID_IDS: 1}, errors)

    def test_check_for_errors_returns_empty_dict_for_valid_text(self):
        article = self.create_article_with_text(u"{{Denkmalliste Sachsen Tabellenzeile|ID=1234}}")
        errors = self.checker.check_article_for_errors(article)
        self.assertEqual({}, errors)

    def test_check_for_errors_reports_duplicate_ids(self):
        article = self.create_article_with_text(
            u"{{Denkmalliste Sachsen Tabellenzeile|ID=1234}}{{Denkmalliste Sachsen Tabellenzeile|ID=1234}}{{Denkmalliste Sachsen Tabellenzeile|ID=1223}}")
        errors = self.checker.check_article_for_errors(article)
        self.assertEqual({TemplateChecker.ERROR_DUPLICATE_IDS: {u"1234": 2}}, errors)

    def test_check_for_errors_reports_too_many_templates(self):
        article = self.create_article_with_text(u"{{Denkmalliste Sachsen Tabellenzeile|ID=1234}}" * 600)
        errors = self.checker.check_article_for_errors(article)
        self.assertEqual({
            TemplateChecker.ERROR_DUPLICATE_IDS: {u"1234": 600},
            TemplateChecker.ERROR_TOO_MANY_TEMPLATES: 600
        }, errors)

    def test_check_for_errors_can_report_multiple_errors(self):
        article = self.create_article_with_text(
            u"{{Denkmalliste Sachsen Tabellenzeile|ID=1}}{{Denkmalliste Sachsen Tabellenzeile|ID=1}}{{Denkmalliste Sachsen Tabellenzeile|}}")
        errors = self.checker.check_article_for_errors(article)
        expected_errors = {
            TemplateChecker.ERROR_INVALID_IDS: 2,
            TemplateChecker.ERROR_MISSING_IDS: 1,
            TemplateChecker.ERROR_DUPLICATE_IDS: {u"1": 2}
        }
        self.assertEqual(expected_errors, errors)