def main(*args): utf8_writer = codecs.getwriter('utf8') single_categories = False output_destination = utf8_writer(sys.stdout) formatstring = "{}\n" categories_only = False for arg in pywikibot.handle_args(list(args)): if arg == "-out:categories": single_categories = True elif arg == "-out:file": output_destination = codecs.open("Denkmallistenliste.txt", "w", 'utf-8') elif arg == "-fmt:wiki": formatstring = "[[{}]]\n" elif arg == "-fmt:url": formatstring = "https://de.wikipedia.org/wiki/{}\n" elif arg == "-list-cat-only": categories_only = True site = pywikibot.Site() fetcher = CategoryFetcher(site) if categories_only and not single_categories: export_to_file(output_destination, fetcher.get_categories(False), formatstring) return if single_categories: for category in fetcher.get_categories(): with codecs.open(category.title() + u".txt", "w", 'utf-8') as outfile: export_to_file(outfile, category.articles(), formatstring) else: collection = ArticleCollection() callbacks = ArticleIteratorCallbacks(article_callback=collection.cb_add_article) article_iterator = ArticleIterator(callbacks, fetcher.get_categories()) article_iterator.iterate_categories() export_to_file(output_destination, collection.article_list, formatstring)
def main(*args): site = pywikibot.Site() fetcher = CategoryFetcher(site) checker = TemplateChecker() checker.load_config("config/templates.json") checker_bot = CheckerBot(checker, site) all_categories = fetcher.get_categories() callbacks = ArticleIteratorCallbacks( category_callback=checker_bot.cb_store_category_result, article_callback=checker_bot.cb_check_article, logging_callback=pywikibot.log, ) article_iterator = ArticleIterator(callbacks, categories=all_categories) parser = ArticleIteratorArgumentParser(article_iterator, fetcher) for argument in pywikibot.handle_args(list(args)): if parser.check_argument(argument): continue elif argument.find("-outputpage:") == 0: checker_bot.outputpage = argument[12:] elif argument.find("-exclude-articles:") == 0: page = pywikibot.Page(site, argument[18:]) article_iterator.excluded_articles = load_excluded_articles_from_wiki(page) article_iterator.iterate_categories() if article_iterator.categories != all_categories: # Don't update summary page if only single categories were crawled return summary = checker_bot.generate_summary_page() if checker_bot.outputpage: checker_bot.save_wikipage(summary, checker_bot.outputpage + u"/Zusammenfassung") else: pywikibot.output(u"Zusammenfassung") pywikibot.output(u"===============") pywikibot.output(summary) pywikibot.output(checker_bot.generate_config_table())
def main(): utf8_writer = codecs.getwriter('utf8') output_destination = utf8_writer(sys.stdout) sample_size = 100 site = pywikibot.Site() fetcher = CategoryFetcher(site) counties = fetcher.get_categories() for county in counties: sample_county(county, sample_size, output_destination)