def main(*args): utf8_writer = codecs.getwriter('utf8') single_categories = False output_destination = utf8_writer(sys.stdout) formatstring = "{}\n" categories_only = False for arg in pywikibot.handle_args(list(args)): if arg == "-out:categories": single_categories = True elif arg == "-out:file": output_destination = codecs.open("Denkmallistenliste.txt", "w", 'utf-8') elif arg == "-fmt:wiki": formatstring = "[[{}]]\n" elif arg == "-fmt:url": formatstring = "https://de.wikipedia.org/wiki/{}\n" elif arg == "-list-cat-only": categories_only = True site = pywikibot.Site() fetcher = CategoryFetcher(site) if categories_only and not single_categories: export_to_file(output_destination, fetcher.get_categories(False), formatstring) return if single_categories: for category in fetcher.get_categories(): with codecs.open(category.title() + u".txt", "w", 'utf-8') as outfile: export_to_file(outfile, category.articles(), formatstring) else: collection = ArticleCollection() callbacks = ArticleIteratorCallbacks(article_callback=collection.cb_add_article) article_iterator = ArticleIterator(callbacks, fetcher.get_categories()) article_iterator.iterate_categories() export_to_file(output_destination, collection.article_list, formatstring)
def main(*args): site = pywikibot.Site() fetcher = CategoryFetcher(site) checker = TemplateChecker() checker.load_config("config/templates.json") checker_bot = CheckerBot(checker, site) all_categories = fetcher.get_categories() callbacks = ArticleIteratorCallbacks( category_callback=checker_bot.cb_store_category_result, article_callback=checker_bot.cb_check_article, logging_callback=pywikibot.log, ) article_iterator = ArticleIterator(callbacks, categories=all_categories) parser = ArticleIteratorArgumentParser(article_iterator, fetcher) for argument in pywikibot.handle_args(list(args)): if parser.check_argument(argument): continue elif argument.find("-outputpage:") == 0: checker_bot.outputpage = argument[12:] elif argument.find("-exclude-articles:") == 0: page = pywikibot.Page(site, argument[18:]) article_iterator.excluded_articles = load_excluded_articles_from_wiki(page) article_iterator.iterate_categories() if article_iterator.categories != all_categories: # Don't update summary page if only single categories were crawled return summary = checker_bot.generate_summary_page() if checker_bot.outputpage: checker_bot.save_wikipage(summary, checker_bot.outputpage + u"/Zusammenfassung") else: pywikibot.output(u"Zusammenfassung") pywikibot.output(u"===============") pywikibot.output(summary) pywikibot.output(checker_bot.generate_config_table())
def main(*args): wikipedia_site = pywikibot.Site() # Use the site configured in params/user-config commons_site = pywikibot.Site("commons", "commons") checker = TemplateChecker() checker.load_config("config/templates.json") commons_bot = CommonsBot(wikipedia_site, checker) callbacks = ArticleIteratorCallbacks( logging_callback=pywikibot.log, article_callback=commons_bot.cb_check_article ) article_iterator = ArticleIterator(callbacks) article_iterator.log_every_n = 1 parser = ArticleIteratorArgumentParser(article_iterator, None) run_cmd = commons_bot.run_continuously category_name = u"Images from Wiki Loves Monuments 2015 in Germany" start_time = first_day_of_month() for argument in pywikibot.handle_args(args): if argument.find("-category:") == 0: category_name = argument[10:] continue elif parser.check_argument(argument): continue elif argument.find("-start-at:") == 0: start_time_iso = argument[10:] + "T0:00:00Z" start_time = pywikibot.Timestamp.fromISOformat(start_time_iso) elif argument.find("-sleep-seconds:") == 0 and int(argument[15:]) > 0: commons_bot.sleep_seconds = int(argument[15:]) elif argument == "-once": run_cmd = commons_bot.run_once elif argument == "-local-media": commons_site = wikipedia_site category = pywikibot.Category(commons_site, category_name) run_cmd(article_iterator, start_time, category)
def test_article_iterator_iterates_over_categories(self): callbacks = Mock() iterator = ArticleIterator(callbacks) category = Mock() category.articles.return_value = [] iterator.categories = [category] iterator.iterate_categories() callbacks.category.assert_called_once_with(category=category, counter=0, article_iterator=iterator)
def test_article_iterator_returns_correct_counter(self): category_callback = Mock() callbacks = ArticleIteratorCallbacks(category_callback=category_callback) iterator = ArticleIterator(callbacks) articles = [Mock()] * 10 category = Mock() category.articles.return_value = articles iterator.categories = [category] iterator.iterate_categories() category_callback.assert_called_once_with(category=category, counter=10, article_iterator=iterator)
def test_article_iterator_with_multiple_categories_stops_at_limit(self): category_callback = Mock() callbacks = ArticleIteratorCallbacks(category_callback=category_callback) iterator = ArticleIterator(callbacks) iterator.limit = 10 articles = [Mock()] * 10 category = Mock() category.articles.return_value = articles iterator.categories = [category, category] iterator.iterate_categories() category_callback.assert_called_once_with(category=category, counter=10, article_iterator=iterator)
def test_article_iterator_iterates_over_articles(self): callbacks = Mock() iterator = ArticleIterator(callbacks) article1 = Mock() article2 = Mock() category = Mock() category.articles.return_value = [article1, article2] iterator.categories = [category] iterator.iterate_categories() callbacks.article.assert_any_call(article=article1, category=category, counter=0, article_iterator=iterator) callbacks.article.assert_any_call(article=article2, category=category, counter=1, article_iterator=iterator)
def test_excluded_articles_are_skipped(self): article_callback = Mock() callbacks = ArticleIteratorCallbacks(article_callback=article_callback) iterator = ArticleIterator(callbacks) article1 = Mock() article2 = Mock() article1.title.return_value = "Foo" article2.title.return_value = "Bar" category = Mock() category.articles.return_value = [article1, article2] iterator.categories = [category] iterator.excluded_articles = ["Foo"] iterator.iterate_categories() article_callback.assert_called_once_with(article=article2, category=category, counter=0, article_iterator=iterator)
def test_article_iterator_logs_every_n_articles(self): log_callback = Mock() callbacks = ArticleIteratorCallbacks(logging_callback=log_callback) iterator = ArticleIterator(callbacks) iterator.log_every_n = 1 article1 = Mock() article2 = Mock() article1.title.return_value = "Foo" article2.title.return_value = "Bar" category = Mock() category.articles.return_value = [article1, article2] iterator.categories = [category] iterator.iterate_categories() log_callback.assert_any_call(u"Fetching page 0 (Foo)") log_callback.assert_any_call(u"Fetching page 1 (Bar)")
def test_category_limit_is_respected_together_with_limit(self): category_callback = Mock() article_callback = Mock() callbacks = ArticleIteratorCallbacks(category_callback=category_callback, article_callback=article_callback) iterator = ArticleIterator(callbacks) iterator.limit = 5 iterator.articles_per_category_limit = 3 articles = [Mock()] * 10 category = Mock() category.articles.return_value = articles iterator.categories = [category, category, category, category] iterator.iterate_categories() self.assertEqual(article_callback.call_count, 5) self.assertEqual(category_callback.call_count, 2)