Example #1
0
def main(*args):
    utf8_writer = codecs.getwriter('utf8')
    single_categories = False
    output_destination = utf8_writer(sys.stdout)
    formatstring = "{}\n"
    categories_only = False
    for arg in pywikibot.handle_args(list(args)):
        if arg == "-out:categories":
            single_categories = True
        elif arg == "-out:file":
            output_destination = codecs.open("Denkmallistenliste.txt", "w", 'utf-8')
        elif arg == "-fmt:wiki":
            formatstring = "[[{}]]\n"
        elif arg == "-fmt:url":
            formatstring = "https://de.wikipedia.org/wiki/{}\n"
        elif arg == "-list-cat-only":
            categories_only = True

    site = pywikibot.Site()
    fetcher = CategoryFetcher(site)

    if categories_only and not single_categories:
        export_to_file(output_destination, fetcher.get_categories(False), formatstring)
        return

    if single_categories:
        for category in fetcher.get_categories():
            with codecs.open(category.title() + u".txt", "w", 'utf-8') as outfile:
                export_to_file(outfile, category.articles(), formatstring)
    else:
        collection = ArticleCollection()
        callbacks = ArticleIteratorCallbacks(article_callback=collection.cb_add_article)
        article_iterator = ArticleIterator(callbacks, fetcher.get_categories())
        article_iterator.iterate_categories()
        export_to_file(output_destination, collection.article_list, formatstring)
Example #2
0
def main(*args):
    site = pywikibot.Site()
    fetcher = CategoryFetcher(site)
    checker = TemplateChecker()
    checker.load_config("config/templates.json")
    checker_bot = CheckerBot(checker, site)
    all_categories = fetcher.get_categories()
    callbacks = ArticleIteratorCallbacks(
        category_callback=checker_bot.cb_store_category_result,
        article_callback=checker_bot.cb_check_article,
        logging_callback=pywikibot.log,
    )
    article_iterator = ArticleIterator(callbacks, categories=all_categories)
    parser = ArticleIteratorArgumentParser(article_iterator, fetcher)
    for argument in pywikibot.handle_args(list(args)):
        if parser.check_argument(argument):
            continue
        elif argument.find("-outputpage:") == 0:
            checker_bot.outputpage = argument[12:]
        elif argument.find("-exclude-articles:") == 0:
            page = pywikibot.Page(site, argument[18:])
            article_iterator.excluded_articles = load_excluded_articles_from_wiki(page)
    article_iterator.iterate_categories()

    if article_iterator.categories != all_categories:   # Don't update summary page if only single categories were crawled
        return
    summary = checker_bot.generate_summary_page()
    if checker_bot.outputpage:
        checker_bot.save_wikipage(summary, checker_bot.outputpage + u"/Zusammenfassung")
    else:
        pywikibot.output(u"Zusammenfassung")
        pywikibot.output(u"===============")
        pywikibot.output(summary)
        pywikibot.output(checker_bot.generate_config_table())
Example #3
0
def main():
    utf8_writer = codecs.getwriter('utf8')
    output_destination = utf8_writer(sys.stdout)
    sample_size = 100

    site = pywikibot.Site()
    fetcher = CategoryFetcher(site)
    counties = fetcher.get_categories()
    for county in counties:
        sample_county(county, sample_size, output_destination)