コード例 #1
0
 def test_NamespaceFilterPageGenerator(self):
     self.assertFunction("NamespaceFilterPageGenerator")
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0)
     self.assertEqual(len(tuple(gen)), 3)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1)
     self.assertEqual(len(tuple(gen)), 4)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10)
     self.assertEqual(len(tuple(gen)), 6)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10))
     self.assertEqual(len(tuple(gen)), 10)
コード例 #2
0
 def test_NamespaceFilterPageGenerator(self):
     self.assertFunction("NamespaceFilterPageGenerator")
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0)
     self.assertTrue(len(tuple(gen)) == 3)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1)
     self.assertTrue(len(tuple(gen)) == 4)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10)
     self.assertTrue(len(tuple(gen)) == 6)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10))
     self.assertTrue(len(tuple(gen)) == 10)
コード例 #3
0
ファイル: table2wiki.py プロジェクト: steele2721/pywikibot
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    options = {}
    gen = None

    local_args = pywikibot.handle_args(args)

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    gen_factory = pagegenerators.GeneratorFactory(positional_arg_name='page')

    for arg in local_args:
        option, sep, value = arg.partition(':')
        if option == '-xml':
            filename = value or pywikibot.input(
                "Please enter the XML dump's filename:")
            gen = TableXmlDumpPageGenerator(filename)
        elif option == '-auto':
            issue_deprecation_warning(
                'The usage of "-auto"', '-always',
                1, ArgumentDeprecationWarning, since='20170205')
            options['always'] = True
        elif option in ['-always', '-quiet', '-skipwarning']:
            options[option[1:]] = True
        else:
            if option in ('-sql', '-mysqlquery'):
                if option == '-sql':
                    issue_deprecation_warning(
                        'The usage of "-sql"', '-mysqlquery',
                        1, ArgumentDeprecationWarning, since='20170205')

                query = value or """
SELECT page_namespace, page_title
FROM page JOIN text ON (page_id = old_id)
WHERE old_text LIKE '%<table%'
"""
                arg = '-mysqlquery:' + query
            gen_factory.handleArg(arg)

    if gen:
        gen = pagegenerators.NamespaceFilterPageGenerator(
            gen, gen_factory.namespaces)
    else:
        gen = gen_factory.getCombinedGenerator()

    if gen:
        if not gen_factory.nopreload:
            gen = pagegenerators.PreloadingGenerator(gen)
        bot = Table2WikiRobot(generator=gen, **options)
        bot.run()
    else:
        suggest_help(missing_generator=True)
コード例 #4
0
    def __init__(self, configurl=u'https://raw.githubusercontent.com/wikimedia/labs-tools-heritage/master/erfgoedbot/monuments_config/nl_nl.json'):
        """
        Grab generator based on search to work on.

        """
        page = requests.get(configurl)
        self.monumentsconfig = page.json()
        #print (self.monumentsconfig)
        self.site = pywikibot.Site(self.monumentsconfig.get(u'lang'), self.monumentsconfig.get(u'project'))
        self.repo = self.site.data_repository()

        self.wikidatafield = None
        for field in self.monumentsconfig.get('fields'):
            if field.get(u'dest') and field.get(u'dest')=='wd_item':
                self.wikidatafield = field.get(u'source')

        self.property = u'P359'
        self.designation = u'Q916333'
        self.monuments = self.getMonumentsOnWikidata(self.property, self.designation)

        row_template = pywikibot.Page(self.site, '{0}:{1}'.format(self.site.namespace(10),
                                                             self.monumentsconfig.get('rowTemplate')))

        trans_gen = pagegenerators.ReferringPageGenerator(row_template,
                                                                                   onlyTemplateInclusion=True)
        self.generator = pagegenerators.NamespaceFilterPageGenerator(trans_gen, self.monumentsconfig.get('namespaces'),
                                                                     site=self.site)
コード例 #5
0
def getOorlogsmonumentenDataGenerator():
    """
    Generator to parse https://nl.wikipedia.org/w/index.php?title=Speciaal:VerwijzingenNaarHier/Sjabloon:Tabelrij_oorlogsmonument_Nederland&namespace=0&limit=500
    """

    site = pywikibot.Site('nl', 'wikipedia')

    row_template = pywikibot.Page(
        site, 'Template:Tabelrij oorlogsmonument Nederland')

    trans_gen = pagegenerators.ReferringPageGenerator(
        row_template, onlyTemplateInclusion=True)
    filtered_gen = pagegenerators.NamespaceFilterPageGenerator(trans_gen, [0],
                                                               site=site)

    for page in filtered_gen:
        print page.title()
        templates = page.templatesWithParams()
        for (template, params) in templates:
            #print template
            if template.title(
                    with_ns=False) == u'Tabelrij oorlogsmonument Nederland':  #
                metadata = {}

                for param in params:
                    #print param
                    (field, _, value) = param.partition(u'=')
                    # Remove leading or trailing spaces
                    field = field.strip()
                    metadata[field] = value
                    #print field
                    #print value
                yield metadata
コード例 #6
0
 def __init__(self, pageToUnlink, namespaces, always):
     self.pageToUnlink = pageToUnlink
     gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
     if namespaces != []:
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
     self.generator = pagegenerators.PreloadingGenerator(gen)
     linktrail = pywikibot.getSite().linktrail()
     # The regular expression which finds links. Results consist of four
     # groups:
     #
     # group title is the target page title, that is, everything
     # before | or ].
     #
     # group section is the page section.
     # It'll include the # to make life easier for us.
     #
     # group label is the alternative link title, that's everything
     # between | and ].
     #
     # group linktrail is the link trail, that's letters after ]] which are
     # part of the word.
     # note that the definition of 'letter' varies from language to language.
     self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
                             % linktrail)
     self.always = always
     self.done = False
     self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
                                     self.pageToUnlink.title())
コード例 #7
0
def main():
    featured = False
    gen = None

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-featured':
            featured = True
        else:
            genFactory.handleArg(arg)

    mysite = pywikibot.Site()
    if mysite.sitename() == 'wikipedia:nl':
        pywikibot.output(
            u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}'
        )
        sys.exit()

    if featured:
        featuredList = i18n.translate(mysite, featured_articles)
        ref = pywikibot.Page(pywikibot.Site(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        pywikibot.showHelp('fixing_redirects')
コード例 #8
0
def main():
    global mysite, linktrail, page
    start = []
    for arg in pywikibot.handleArgs():
        start.append(arg)
    if start:
        start = " ".join(start)
    else:
        start = "!"
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()
    try:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)
    except pywikibot.NoPage:
        pywikibot.output(
            "The bot does not know the disambiguation category for your wiki.")
        raise
    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            pagestoload = pagegenerators.PreloadingGenerator(pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []
コード例 #9
0
    def __init__(self, pageToUnlink, **kwargs):
        self.availableOptions.update({
            'namespaces': [],
            # Which namespaces should be processed?
            # default to [] which means all namespaces will be processed
        })

        super(UnlinkBot, self).__init__(**kwargs)
        self.pageToUnlink = pageToUnlink
        linktrail = self.pageToUnlink.site.linktrail()

        gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
        if self.getOption('namespaces') != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, self.getOption('namespaces'))
        self.generator = pagegenerators.PreloadingGenerator(gen)
        # The regular expression which finds links. Results consist of four
        # groups:
        #
        # group title is the target page title, that is, everything
        # before | or ].
        #
        # group section is the page section.
        # It'll include the # to make life easier for us.
        #
        # group label is the alternative link title, that's everything
        # between | and ].
        #
        # group linktrail is the link trail, that's letters after ]] which are
        # part of the word.
        # note that the definition of 'letter' varies from language to language.
        self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
                                % linktrail)
        self.comment = i18n.twtranslate(self.pageToUnlink.site, 'unlink-unlinking',
                                        self.pageToUnlink.title())
コード例 #10
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site,
                                  "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()
コード例 #11
0
def get_template_generator(lng, tpl):
    """Create a generator of articles linking to template."""
    site = pwb.Site(lng, "wikipedia")
    tpl_name = "{}:{}".format(site.namespace(10), tpl)
    tpl_page = pwb.Page(site, tpl_name)
    ref_gen = pg.ReferringPageGenerator(tpl_page, onlyTemplateInclusion=True)
    filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0])
    return site.preloadpages(filter_gen, pageprops=True)
コード例 #12
0
 def test_QualityFilterPageGenerator(self):
     site = self.site
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.QualityFilterPageGenerator(gen, [0])
     self.assertEqual(len(tuple(gen)), 7)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, [4])
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     self.assertEqual(len(tuple(gen)), 10)
コード例 #13
0
def getCategoryContent(catname):
    cat = pywikibot.Category(site, catname)
    pagesInCat = list(cat.articles(False))
    pagesList = pagegenerators.PreloadingGenerator(
        pagesInCat)  # On génère la liste des pages incluses dans la catégorie
    pagesList = pagegenerators.NamespaceFilterPageGenerator(
        pagesList, [0])  #On ne garde que les articles (Namespace 0)

    return pagesList
コード例 #14
0
def list_template_usage(row_template_name):
    """Return a generator of main space pages transcluding a given template."""
    site = wikipedia.getSite('de', 'wikipedia')
    rowTemplate = wikipedia.Page(
        site, u'%s:%s' % (site.namespace(10), row_template_name))
    transGen = pagegenerators.ReferringPageGenerator(
        rowTemplate, onlyTemplateInclusion=True)
    filteredGen = pagegenerators.NamespaceFilterPageGenerator(transGen, [0])
    generator = pagegenerators.PreloadingGenerator(filteredGen)
    return generator
コード例 #15
0
def generate_transclusions(site, template, namespaces=[]):
    pywikibot.output(u'Fetching template transclusions...')
    transclusion_page = pywikibot.Page(site, template, ns=10)
    gen = pagegenerators.ReferringPageGenerator(transclusion_page,
                                                onlyTemplateInclusion=True)
    if namespaces:
        gen = pagegenerators.NamespaceFilterPageGenerator(
            gen, namespaces, site)
    for page in gen:
        yield page
コード例 #16
0
 def test_NamespaceFilterPageGenerator(self):
     self.assertFunction("NamespaceFilterPageGenerator")
     site = self.site
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0, site)
     self.assertEqual(len(tuple(gen)), 3)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1, site)
     self.assertEqual(len(tuple(gen)), 4)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10, site)
     self.assertEqual(len(tuple(gen)), 6)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10), site)
     self.assertEqual(len(tuple(gen)), 10)
     gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
     gen = pagegenerators.NamespaceFilterPageGenerator(
         gen, ('Talk', 'Template'), site)
     self.assertEqual(len(tuple(gen)), 10)
コード例 #17
0
def main():
    commonssite = pywikibot.Site(u'commons', u'commons')
    templatepage = pywikibot.Page(commonssite, title=u'Template:RKDimages')
    gen = pagegenerators.PreloadingGenerator(
        pagegenerators.NamespaceFilterPageGenerator(
            pagegenerators.ReferringPageGenerator(templatepage,
                                                  onlyTemplateInclusion=True),
            6))

    rkdimagesImporter = RKDImagesImporter(gen)
    rkdimagesImporter.run()
コード例 #18
0
ファイル: commonscat.py プロジェクト: gladys123/pywikibot
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    generator = None
    checkcurrent = False
    ns = []
    ns.append(14)

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-summary'):
            if len(arg) == 8:
                options['summary'] = pywikibot.input(
                    u'What summary do you want to use?')
            else:
                options['summary'] = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
        elif arg == '-always':
            options['always'] = True
        else:
            genFactory.handleArg(arg)

    if checkcurrent:
        primaryCommonscat, commonscatAlternatives = \
            CommonscatBot.getCommonscatTemplate(
                pywikibot.Site().code)
        generator = pagegenerators.NamespaceFilterPageGenerator(
            pagegenerators.ReferringPageGenerator(pywikibot.Page(
                pywikibot.Site(), u'Template:' + primaryCommonscat),
                                                  onlyTemplateInclusion=True),
            ns)

    if not generator:
        generator = genFactory.getCombinedGenerator()

    if generator:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        bot = CommonscatBot(pregenerator, **options)
        bot.run()
    else:
        pywikibot.showHelp()
コード例 #19
0
 def _templates_generator(self):
     generator = pagegenerators.NamespaceFilterPageGenerator(
         pagegenerators.ReferringPageGenerator(self.source_template, onlyTemplateInclusion=True),
         [10]
     )
     for page in generator:
         template = page
         if template.title().endswith("/doc") and pywikibot.Page(self.site, template.title()[:-4]).exists():
             template = pywikibot.Page(self.site, template.title()[:-4])
         if template != self.source_template:
             yield template
         for redirect in template.getReferences(redirectsOnly=True, withTemplateInclusion=False):
             yield redirect
コード例 #20
0
ファイル: claim_templates.py プロジェクト: jjmhtp/toollabs
    def __init__(self, lang, noclaims, templateclaims):
        """
        Arguments:
            * lang           - The language code of the Wikipedia we're working on
            * noclaims       - The title of the page on Wikidata with the list of pages to work on
            * templateclaims - The title of the page on Wikipedia with the template claims

        """
        self.lang = lang
        self.site = pywikibot.Site(self.lang, u'wikipedia')
        self.repo = self.site.data_repository()
        self.generator = pg.PreloadingGenerator(pg.NamespaceFilterPageGenerator(self.getNoclaimGenerator(noclaims), 0))
        self.templateclaims = templateclaims
        self.templates = self.getTemplateClaims(templateclaims)
コード例 #21
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Never ask before changing a page
    always = False
    to13 = False
    format = False

    for arg in pywikibot.handleArgs():
        if arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        elif arg == '-to13':
            to13 = True
        elif arg == '-format':
            format = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    site = pywikibot.getSite()
    site.login()
    if pageTitle:
        gen = iter(
            [pywikibot.Page(pywikibot.Link(t, site)) for t in pageTitle])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('isbn')
    else:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = IsbnBot(preloadingGen, to13=to13, format=format, always=always)
        bot.run()
コード例 #22
0
def list_template_usage(site_obj, tmpl_name):
    """
    Takes Site object and template name and returns a generator.

    The function expects a Site object (pywikibot.Site()) and
    a template name (String). It creates a list of all
    pages using that template and returns them as a generator.
    The generator will load 50 pages at a time for iteration.
    """
    name = "{}:{}".format(site_obj.namespace(10), tmpl_name)
    tmpl_page = pywikibot.Page(site_obj, name)
    ref_gen = pg.ReferringPageGenerator(tmpl_page, onlyTemplateInclusion=True)
    filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0])
    generator = site_obj.preloadpages(filter_gen, pageprops=True)
    return generator
コード例 #23
0
ファイル: templatecount.py プロジェクト: gladys123/pywikibot
 def template_dict_generator(templates, namespaces):
     mysite = pywikibot.Site()
     # The names of the templates are the keys, and lists of pages
     # transcluding templates are the values.
     mytpl = mysite.getNamespaceIndex(mysite.template_namespace())
     for template in templates:
         transcludingArray = []
         gen = pagegenerators.ReferringPageGenerator(
             pywikibot.Page(mysite, template, ns=mytpl),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
         for page in gen:
             transcludingArray.append(page)
         yield template, transcludingArray
コード例 #24
0
ファイル: nowcommons.py プロジェクト: anrao91/pywikibot-core
 def getPageGenerator(self):
     if self.getOption('use_hash'):
         gen = self.useHashGenerator()
     else:
         nowCommonsTemplates = [pywikibot.Page(self.site, title,
                                               ns=10)
                                for title in self.ncTemplates()]
         gens = [pg.ReferringPageGenerator(t, followRedirects=True,
                                           onlyTemplateInclusion=True)
                 for t in nowCommonsTemplates]
         gen = pg.CombinedPageGenerator(gens)
         gen = pg.NamespaceFilterPageGenerator(gen, [6])
         gen = pg.DuplicateFilterPageGenerator(gen)
         gen = pg.PreloadingGenerator(gen)
     return gen
コード例 #25
0
ファイル: commonscat.py プロジェクト: ImanYZ/ExpertIdeas
def main():
    """ Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    """

    summary = None
    generator = None
    checkcurrent = False
    always = False
    ns = []
    ns.append(14)

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = pywikibot.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if checkcurrent:
        primaryCommonscat, commonscatAlternatives = \
            CommonscatBot.getCommonscatTemplate(
                pywikibot.Site().code)
        generator = pagegenerators.NamespaceFilterPageGenerator(
            pagegenerators.ReferringPageGenerator(pywikibot.Page(
                pywikibot.Site(), u'Template:' + primaryCommonscat),
                                                  onlyTemplateInclusion=True),
            ns)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData(u'You have to specify the generator you '
                                    u'want to use for the script!')

    pregenerator = pagegenerators.PreloadingGenerator(generator)
    bot = CommonscatBot(pregenerator, always, summary)
    bot.run()
コード例 #26
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    local_args = pywikibot.handle_args(args)

    generator = None
    start = local_args[0] if local_args else '!'

    mysite = pywikibot.Site()
    try:
        mysite.disambcategory()
    except pywikibot.Error as e:
        pywikibot.output(e)
    else:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)

    if not generator:
        pywikibot.showHelp()
        return

    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            pagestoload = pagegenerators.PreloadingGenerator(pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []
コード例 #27
0
def crawlerCat(category, delay, subcat=False, checkTalk=False):
    log = u''
    cat = pywikibot.Category(site, category)
    pagesInCat = list(cat.articles(False))
    pagesList = pagegenerators.PreloadingGenerator(
        pagesInCat)  # On génère la liste des pages incluses dans la catégorie
    pagesList = pagegenerators.NamespaceFilterPageGenerator(
        pagesList, [0])  #On ne garde que les articles (Namespace 0)
    log += removeTemplate(pagesList, cat.title(withNamespace=False), delay,
                          checkTalk)

    if subcat:
        subcat -= 1
        subcategories = list(cat.subcategories())
        for subc in subcategories:
            log += crawlerCat(subc.title(withNamespace=False), delay, subcat,
                              checkTalk)

    return log
コード例 #28
0
def main(*args):
    '''
    Main and only loop
    '''

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        genFactory.handleArg(arg)
    gen = genFactory.getCombinedGenerator(preload=True)
    if gen:
        generator = pagegenerators.NamespaceFilterPageGenerator(gen, 14)
        skiptemplates = [
            u'Interwiki from wikidata', u'On Wikidata', u'Countries of Europe',
            u'VN'
        ]

        for page in generator:
            pywikibot.output(u'Working on %s' % (page.title(), ))
            if not page.exists():
                pywikibot.output(u'Page does not exist')
                continue
            text = page.get()
            foundtemplate = False
            for skiptemplate in skiptemplates:
                if u'{{%s' % (skiptemplate.lower(), ) in text.lower():
                    foundtemplate = True
            if foundtemplate:
                pywikibot.output(u'Already has the template')
                continue
            newtext = u'{{Interwiki from wikidata}}\n' + text
            summary = u'Adding {{Interwiki from wikidata}} to get links to Wikipedia articles'
            pywikibot.showDiff(text, newtext)
            page.put(newtext, summary=summary)

    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
コード例 #29
0
def main():
    local_args = pywikibot.handleArgs()

    generator = None
    start = local_args[0] if local_args else '!'

    mysite = pywikibot.Site()
    try:
        mysite.disambcategory()
    except pywikibot.Error as e:
        pywikibot.output(e)
    else:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)

    if not generator:
        pywikibot.showHelp()
        return

    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            pagestoload = pagegenerators.PreloadingGenerator(pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []
コード例 #30
0
ファイル: table2wiki.py プロジェクト: cllu/pywikibot-core
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    quietMode = False  # use -quiet to get less output
    # if the -file argument is used, page titles are stored in this array.
    # otherwise it will only contain one page.
    articles = []
    # if -file is not used, this temporary array is used to read the page title.
    page_title = []

    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []

    xmlfilename = None
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handle_args(args):
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlfilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlfilename = arg[5:]
            gen = TableXmlDumpPageGenerator(xmlfilename)
        elif arg == '-sql':
            query = u"""
SELECT page_namespace, page_title
FROM page JOIN text ON (page_id = old_id)
WHERE old_text LIKE '%<table%'
LIMIT 200"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-skip:'):
            articles = articles[articles.index(arg[6:]):]
        elif arg.startswith('-auto'):
            config.table2wikiAskOnlyWarnings = True
            config.table2wikiSkipWarnings = True
            pywikibot.output('Automatic mode!\n')
        elif arg.startswith('-quiet'):
            quietMode = True
        else:
            if not genFactory.handleArg(arg):
                page_title.append(arg)

    # if the page is given as a command line argument,
    # connect the title's parts with spaces
    if page_title != []:
        page_title = ' '.join(page_title)
        page = pywikibot.Page(pywikibot.Site(), page_title)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()

    if gen:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = Table2WikiRobot(preloadingGen, quietMode)
        bot.run()
    else:
        pywikibot.showHelp('table2wiki')