Exemple #1
0
def crawlerLink(pagename):
	#pagename = unicode(arg[len('-links:'):], 'utf-8')
	page = wikipedia.Page(site, pagename)
	gen = pagegenerators.ReferringPageGenerator(page)
	#gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
	for Page in pagegenerators.PreloadingGenerator(gen,100):
		modification(Page.title())
def main():
    featured = False
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-featured':
            featured = True
        else:
            genFactory.handleArg(arg)

    mysite = pywikibot.getSite()
    if mysite.sitename() == 'wikipedia:nl':
        pywikibot.output(
            u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}'
        )
        sys.exit()

    if featured:
        featuredList = pywikibot.translate(mysite, featured_articles)
        ref = pywikibot.Page(pywikibot.getSite(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        pywikibot.showHelp('fixing_redirects')
 def countTemplates(self, templates, namespaces):
     mysite = wikipedia.getSite()
     mytpl = mysite.template_namespace() + ':'
     finalText = [
         u'Number of transclusions per template',
         u'------------------------------------'
     ]
     total = 0
     # The names of the templates are the keys, and the numbers of transclusions are the values.
     templateDict = {}
     for template in templates:
         gen = pagegenerators.ReferringPageGenerator(
             wikipedia.Page(mysite, mytpl + template),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(
                 gen, namespaces)
         count = 0
         for page in gen:
             count += 1
         templateDict[template] = count
         finalText.append(u'%s: %d' % (template, count))
         total += count
     for line in finalText:
         wikipedia.output(line, toStdout=True)
     wikipedia.output(u'TOTAL: %d' % total, toStdout=True)
     wikipedia.output(u'Report generated on %s' %
                      datetime.datetime.utcnow().isoformat(),
                      toStdout=True)
     return templateDict
Exemple #4
0
 def __init__(self, pageToUnlink, namespaces, always):
     self.pageToUnlink = pageToUnlink
     gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
     if namespaces != []:
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
     self.generator = pagegenerators.PreloadingGenerator(gen)
     linktrail = pywikibot.getSite().linktrail()
     # The regular expression which finds links. Results consist of four
     # groups:
     #
     # group title is the target page title, that is, everything
     # before | or ].
     #
     # group section is the page section.
     # It'll include the # to make life easier for us.
     #
     # group label is the alternative link title, that's everything
     # between | and ].
     #
     # group linktrail is the link trail, that's letters after ]] which are
     # part of the word.
     # note that the definition of 'letter' varies from language to language.
     self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
                             % linktrail)
     self.always = always
     self.done = False
     self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
                                     self.pageToUnlink.title())
Exemple #5
0
def main():
    genFactory = pagegenerators.GeneratorFactory()
    commandline_arguments = list()
    templateTitle = u''
    for arg in pywikibot.handleArgs():
        if arg.startswith('-template'):
            if len(arg) == 9:
                templateTitle = pywikibot.input(
                    u'Please enter the template to work on:')
            else:
                templateTitle = arg[10:]
        elif genFactory.handleArg(arg):
            continue
        else:
            commandline_arguments.append(arg)

    if len(commandline_arguments) % 2 or not templateTitle:
        raise ValueError  # or something.
    fields = dict()

    for i in xrange(0, len(commandline_arguments), 2):
        fields[commandline_arguments[i]] = commandline_arguments[i + 1]
    if templateTitle:
        gen = pagegenerators.ReferringPageGenerator(pywikibot.Page(
            pywikibot.getSite(), "Template:%s" % templateTitle),
                                                    onlyTemplateInclusion=True)
    else:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        # TODO: Build a transcluding generator based on templateTitle
        return

    bot = HarvestRobot(gen, templateTitle, fields)
    bot.run()
 def listTemplates(self, templates, namespaces):
     mysite = wikipedia.getSite()
     count = 0
     # The names of the templates are the keys, and lists of pages transcluding templates are the values.
     templateDict = {}
     finalText = [u'List of pages transcluding templates:']
     for template in templates:
         finalText.append(u'* %s' % template)
     finalText.append(u'------------------------------------')
     for template in templates:
         transcludingArray = []
         gen = pagegenerators.ReferringPageGenerator(
             wikipedia.Page(mysite,
                            mysite.template_namespace() + ':' + template),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(
                 gen, namespaces)
         for page in gen:
             finalText.append(u'%s' % page.title())
             count += 1
             transcludingArray.append(page)
         templateDict[template] = transcludingArray
     finalText.append(u'Total page count: %d' % count)
     for line in finalText:
         wikipedia.output(line, toStdout=True)
     wikipedia.output(u'Report generated on %s' %
                      datetime.datetime.utcnow().isoformat(),
                      toStdout=True)
     return templateDict
Exemple #7
0
    def __init__(self):
        '''Constructor of SubsterBot(), initialize needed vars.'''

        pywikibot.output(
            u'\03{lightgreen}* Initialization of bot:\03{default}')

        basic.AutoBasicBot.__init__(self)

        # modification of timezone to be in sync with wiki
        os.environ['TZ'] = 'Europe/Amsterdam'
        if hasattr(time, "tzset"):
            time.tzset()
            pywikibot.output(u'Setting process TimeZone (TZ): %s' %
                             str(time.tzname))  # ('CET', 'CEST')
        else:
            # e.g. windows doesn't have that attribute
            pywikibot.warning(
                u'This operating system has NO SUPPORT for setting TimeZone by '
                u'code! Before running this script, please set the TimeZone '
                u'manually to one approriate for use with the Wikipedia '
                u'language and region you intend to.')

        # init constants
        self._bot_config = bot_config
        # convert e.g. namespaces to corret language
        self._bot_config['TemplateName'] = pywikibot.Page(
            self.site, self._bot_config['TemplateName']).title()
        self._template_regex = re.compile(
            '\{\{' + self._bot_config['TemplateName'] + '(.*?)\}\}', re.S)
        # TODO: implement proper error handling template/output for wikidata
        #       see: https://bugzilla.wikimedia.org/show_bug.cgi?id=60225
        #       see: https://www.wikidata.org/wiki/Template:Exchange_Rate_Data
        #if self.site.is_data_repository():
        #    self._bot_config['VerboseMessage'] = self._bot_config['data_VerboseMessage']

        # init constants
        self._userListPage = pywikibot.Page(self.site,
                                            self._bot_config['TemplateName'])
        self._ConfCSSpostprocPage = pywikibot.Page(
            self.site, self._bot_config['ConfCSSpostproc'])
        self._ConfCSSconfigPage = pywikibot.Page(
            self.site, self._bot_config['ConfCSSconfig'])
        self.pagegen = pagegenerators.ReferringPageGenerator(
            self._userListPage, onlyTemplateInclusion=True)
        self._code = self._ConfCSSpostprocPage.get()
        pywikibot.output(
            u'Imported postproc %s rev %s from %s' %
            ((self._ConfCSSpostprocPage.title(asLink=True), ) +
             self._ConfCSSpostprocPage.getVersionHistory(revCount=1)[0][:2]))
        self._flagenable = {}
        if self._ConfCSSconfigPage.exists():
            exec(self._ConfCSSconfigPage.get()
                 )  # with variable: bot_config_wiki
            self._flagenable = bot_config_wiki['flagenable']
            pywikibot.output(
                u'Imported config %s rev %s from %s' %
                ((self._ConfCSSconfigPage.title(asLink=True), ) +
                 self._ConfCSSconfigPage.getVersionHistory(revCount=1)[0][:2]))
def generateTransclusions(Site, template, namespaces=[]):
    pywikibot.output(u'Fetching template transclusions...')
    transclusionPage = pywikibot.Page(Site, template, defaultNamespace=10)
    gen = pagegenerators.ReferringPageGenerator(transclusionPage,
                                                onlyTemplateInclusion=True)
    if namespaces:
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces, Site)
    for page in gen:
        yield page
 def getPageGenerator(self):
     if use_hash:
         gen = self.useHashGenerator()
     else:
         gen = pagegenerators.ReferringPageGenerator(
             self.nowCommonsTemplate,
             followRedirects=True,
             onlyTemplateInclusion=True)
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, [6])
     return gen
Exemple #10
0
def modification(PageHS):
    page = Page(site, PageHS)
    if page.exists():
        if page.namespace() != 0 and page.title(
        ) != u'Utilisateur:JackBot/test':
            return
        else:
            try:
                PageEnd = page.getRedirectTarget()
            except wikipedia.NoPage:
                print "NoPage"
                return
            gen2 = pagegenerators.ReferringPageGenerator(page)
            for PageCourante in pagegenerators.PreloadingGenerator(gen2, 100):
                print(PageCourante.title().encode(config.console_encoding,
                                                  'replace'))
                try:
                    PageBegin = PageCourante.get()
                except wikipedia.NoPage:
                    print "NoPage"
                    return
                except wikipedia.IsRedirectPage:
                    print "Redirect page"
                    return
                except wikipedia.LockedPage:
                    print "Locked/protected page"
                    return
                except wikipedia.ServerError:
                    print "ServerError"
                    return
                except wikipedia.NoSuchSite:
                    print "NoSuchSite"
                    return
                except wikipedia.InvalidTitle:
                    print "InvalidTitle"
                    return
                PageTemp = PageBegin
                while PageTemp.find(u'[[' + PageHS + u']]') != -1:
                    PageTemp = PageTemp[
                        0:PageTemp.find(u'[[' + PageHS + u']]') +
                        2] + PageEnd.title() + u'|' + PageHS + PageTemp[
                            PageTemp.find(u'[[' + PageHS + u']]') +
                            len(u'[[' + PageHS + u']]') - 2:len(PageTemp)]
                while PageTemp.find(u'[[' + PageHS + u'|') != -1:
                    PageTemp = PageTemp[
                        0:PageTemp.find(u'[[' + PageHS + u'|') +
                        2] + PageEnd.title(
                        ) + PageTemp[PageTemp.find(u'[[' + PageHS + u'|') +
                                     len(u'[[' + PageHS + u'|') -
                                     1:len(PageTemp)]
                if PageTemp != PageBegin: sauvegarde(PageCourante, PageTemp)
            if PageHS.find(u'/') != -1 or PageHS.find(u' - ') != -1:
                page.delete(u'Suppression après gestion des pages liées',
                            u'',
                            throttle=True)
def crawlerLink(pagename, apres):
    modifier = u'False'
    #pagename = unicode(arg[len('-links:'):], 'utf-8')
    page = wikipedia.Page(site, pagename)
    gen = pagegenerators.ReferringPageGenerator(page)
    gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    for Page in pagegenerators.PreloadingGenerator(gen, 100):
        #print(Page.title().encode(config.console_encoding, 'replace'))
        if not apres or apres == u'' or modifier == u'True':
            modification(Page.title())  #crawlerLink(Page.title())
        elif Page.title() == apres:
            modifier = u'True'
Exemple #12
0
def main():
    gen = None
    acceptall = False
    test = False
    linkingPageTitle = 'Gebruiker:Erwin/Bot/Archiveerlinks'
    #What projects should be checked?
    projects = {'wikipedia': ['nl']}

    linkingPageTitles = {
        'wikipedia': {
            'nl': 'Gebruiker:Erwin/Bot/Archiveerlinks'
        },
        'wikisource': {
            'nl': 'Gebruiker:Erwin85/Bot/Archiveerlinks'
        }
    }
    for arg in wikipedia.handleArgs():
        if arg == '-always':
            acceptall = True

        #Override defined projects
        #Use: -project:family:code
        elif arg.startswith('-project'):
            if len(arg) == 8:
                project = [
                    wikipedia.input(u'Family?'),
                    wikipedia.input(u'Code?')
                ]
            else:
                project = re.split(r'\:', arg[9:])
            projects = {project[0]: [project[1]]}
        elif arg == '-test':
            test = True
            wikipedia.output(u'Using test settings.')
            projects = {'wikipedia': ['nl']}

    for family, langs in projects.iteritems():
        for lang in langs:
            if not test:
                linkingPageTitle = linkingPageTitles[family][lang]
            else:
                linkingPageTitle = 'Gebruiker:Erwin/Bot/Archiveerlinkstest'

            wikipedia.output(u'\n>> %s:%s<<\n' % (family, lang))
            referredPage = wikipedia.Page(
                wikipedia.getSite(code=lang, fam=family), linkingPageTitle)
            gen = pagegenerators.ReferringPageGenerator(referredPage)
            preloadingGen = pagegenerators.PreloadingGenerator(gen,
                                                               pageNumber=40)
            bot = ArchivingRobot(preloadingGen, time.time(),
                                 wikipedia.getSite(code=lang, fam=family),
                                 linkingPageTitle, acceptall)
            bot.run()
Exemple #13
0
 def countRefs(self, templates, namespaces):
     mysite = wikipedia.getSite()
     mytpl  = mysite.template_namespace()+':'
     finalText = [u'Number of transclusions per template',u'------------------------------------']
     for template in templates:
         gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
         count = 0
         for page in gen:
             count += 1
         finalText.append(u'%s: %d' % (template, count))
     for line in finalText:
         wikipedia.output(line)
def crawlerCatLink(pagename, apres):
    modifier = u'False'
    cat = catlib.Category(site, pagename)
    pages = cat.articlesList(False)
    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        page = wikipedia.Page(site, Page.title())
        gen = pagegenerators.ReferringPageGenerator(page)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
        for PageLiee in pagegenerators.PreloadingGenerator(gen, 100):
            #print(Page.title().encode(config.console_encoding, 'replace'))
            if not apres or apres == u'' or modifier == u'True':
                modification(PageLiee.title())  #crawlerLink(Page.title())
            elif PageLiee.title() == apres:
                modifier = u'True'
Exemple #15
0
def findimages():
    wikien = wikipedia.getSite(code='en', fam='wikipedia')
    commons = wikipedia.getSite(code='commons', fam='commons')
    transclusionPage = wikipedia.Page(wikien, 'Template:Commons ok')
    gen = pagegenerators.ReferringPageGenerator(transclusionPage,
                                                onlyTemplateInclusion=True)
    #	category = catlib.Category(wikien, 'Copy to Wikimedia Commons')
    #	gen = pagegenerators.CategorizedPageGenerator(category, recurse=True)
    for page in gen:
        if page.namespace() == 6:
            print page
            moveimage(page)
        else:
            print '%s is not in the image namespace.' % (str(page))
Exemple #16
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    summary = None
    generator = None
    always = False
    ns = []
    ns.append(14)

    # Process global args and prepare generator args parser

    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs(*args):
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = pywikibot.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            primaryCommonscat, commonscatAlternatives = \
                CommonscatBot.getCommonscatTemplate(
                    pywikibot.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    pywikibot.Page(pywikibot.getSite(),
                                   u'Template:' + primaryCommonscat),
                    onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()

    if generator:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        bot = CommonscatBot(pregenerator, always, summary)
        bot.run()
    else:
        pywikibot.showHelp()
 def template_dict_generator(templates, namespaces):
     mysite = pywikibot.getSite()
     # The names of the templates are the keys, and lists of pages
     # transcluding templates are the values.
     mytpl = mysite.getNamespaceIndex(mysite.template_namespace())
     for template in templates:
         transcludingArray = []
         gen = pg.ReferringPageGenerator(
             pywikibot.Page(mysite, template, defaultNamespace=mytpl),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pg.NamespaceFilterPageGenerator(gen, namespaces)
         for page in gen:
             transcludingArray.append(page)
         yield template, transcludingArray
Exemple #18
0
 def getPageGenerator(self):
     if use_hash:
         gen = self.useHashGenerator()
     else:
         nowCommonsTemplates = [pywikibot.Page(self.site, title,
                                               defaultNamespace=10)
                                for title in self.ncTemplates()]
         gens = [pg.ReferringPageGenerator(t, followRedirects=True,
                                           onlyTemplateInclusion=True)
                 for t in nowCommonsTemplates]
         gen = pg.CombinedPageGenerator(gens)
         gen = pg.NamespaceFilterPageGenerator(gen, [6])
         gen = pg.DuplicateFilterPageGenerator(gen)
         gen = pg.PreloadingGenerator(gen)
     return gen
Exemple #19
0
def main():
    '''
    Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    '''
    summary = None
    generator = None
    checkcurrent = False
    always = False
    ns = []
    ns.append(14)
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = pywikibot.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
            primaryCommonscat, commonscatAlternatives = \
                               CommonscatBot.getCommonscatTemplate(
                                   pywikibot.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    pywikibot.Page(pywikibot.getSite(),
                                   u'Template:' + primaryCommonscat),
                    onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData(
            u'You have to specify the generator you want to use for the script!'
        )

    pregenerator = pagegenerators.PreloadingGenerator(generator)
    bot = CommonscatBot(pregenerator, always, summary)
    bot.run()
Exemple #20
0
    def __iter__(self):
        generator = pagegenerators.ReferringPageGenerator(
            self.disambPage,
            followRedirects=False,
            withTemplateInclusion=False)
        generator = pagegenerators.PageTitleFilterPageGenerator(
            generator, ignore_title)

        refs = [page for page in generator]

        if len(refs) < self.minimum:
            pywikibot.output(u"Found only %d pages to work on; skipping." %
                             len(refs))
            return
        pywikibot.output(u"Will work on %d pages." % len(refs))
        for ref in refs:
            yield ref
Exemple #21
0
def modification(PageHS):
    page = Page(site, PageHS)
    if page.exists():
        if page.namespace() != 10 and page.title(
        ) != u'Utilisateur:JackBot/test':
            return
        else:
            if PageHS.find(u'=') != -1:
                NouveauModele = u'langue|' + PageHS[1:len(PageHS) - 1] + u'}}'
            gen = pagegenerators.ReferringPageGenerator(page, 0)
            for PageCourante in pagegenerators.PreloadingGenerator(gen, 100):
                print(PageCourante.title().encode(config.console_encoding,
                                                  'replace'))
                try:
                    PageBegin = PageCourante.get()
                except wikipedia.NoPage:
                    print "NoPage"
                    return
                except wikipedia.IsRedirectPage:
                    print "Redirect page"
                    return
                except wikipedia.LockedPage:
                    print "Locked/protected page"
                    return
                except wikipedia.ServerError:
                    print "ServerError"
                    return
                except wikipedia.NoSuchSite:
                    print "NoSuchSite"
                    return
                except wikipedia.InvalidTitle:
                    print "InvalidTitle"
                    return
                PageTemp = PageBegin
                while PageTemp.find(PageHS) != -1:
                    if NouveauModele == u'':
                        PageTemp = PageTemp[0:PageTemp.find(
                            PageHS)] + NouveauModele + PageTemp[
                                PageTemp.find(PageHS) +
                                len(PageHS):len(PageTemp)]
                    else:
                        PageTemp = PageTemp[0:PageTemp.find(
                            PageHS)] + PageTemp[PageTemp.find(PageHS) +
                                                len(PageHS):len(PageTemp)]
                if PageTemp != PageBegin:
                    sauvegarde(PageCourante.title(), PageTemp)
Exemple #22
0
def main():
    start = '!'
    featured = False
    namespace = None
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg == '-featured':
            featured = True
        elif arg.startswith('-namespace'):
            if len(arg) == 10:
                namespace = int(wikipedia.input(u'Which namespace should be processed?'))
            else:
                namespace = int(arg[11:])
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()

    mysite = wikipedia.getSite()
    if mysite.sitename() == 'wikipedia:nl':
        wikipedia.output(u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}')
        sys.exit()

    linktrail = mysite.linktrail()
    if featured:
        featuredList = wikipedia.translate(mysite, featured_articles)
        ref = wikipedia.Page(wikipedia.getSite(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
        for page in generator:
            workon(page)
    elif namespace is not None:
        for page in pagegenerators.AllpagesPageGenerator(start=start, namespace=namespace, includeredirects=False):
            workon(page)
    elif gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        wikipedia.showHelp('fixing_redirects')
Exemple #23
0
def main():
    '''
    Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    '''
    summary = None; generator = None; checkcurrent = False; always = False
    ns = []
    ns.append(14)
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = wikipedia.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
            primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(wikipedia.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        if not page.exists():
           wikipedia.output(u'Page %s does not exist. Skipping.' % page.aslink())
        elif page.isRedirectPage():
           wikipedia.output(u'Page %s is a redirect. Skipping.' % page.aslink())
        elif page.isCategoryRedirect():
           wikipedia.output(u'Page %s is a category redirect. Skipping.' % page.aslink())
        elif page.isDisambig():
           wikipedia.output(u'Page %s is a disambiguation. Skipping.' % page.aslink())
        else:
            (status, always) = addCommonscat(page, summary, always)
Exemple #24
0
 def createPageGenerator(self, firstPageTitle):
     if pywikibot.getSite().lang in self.misspellingCategory:
         misspellingCategoryTitle = self.misspellingCategory[pywikibot.getSite().lang]
         misspellingCategory = catlib.Category(pywikibot.getSite(),
                                               misspellingCategoryTitle)
         generator = pagegenerators.CategorizedPageGenerator(
             misspellingCategory, recurse = True, start=firstPageTitle)
     else:
         misspellingTemplateName = 'Template:%s' \
                                   % self.misspellingTemplate[pywikibot.getSite().lang]
         misspellingTemplate = pywikibot.Page(pywikibot.getSite(),
                                              misspellingTemplateName)
         generator = pagegenerators.ReferringPageGenerator(
             misspellingTemplate, onlyTemplateInclusion=True)
         if firstPageTitle:
             pywikibot.output(
                 u'-start parameter unsupported on this wiki because there is no category for misspellings.')
     preloadingGen = pagegenerators.PreloadingGenerator(generator)
     return preloadingGen
Exemple #25
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    bigcategory = u''
    target = u''

    generator = None
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
                generator = [
                    wikipedia.Page(
                        wikipedia.getSite(),
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
        elif arg.startswith('-bigcat'):
            if len(arg) == 7:
                bigcategory = wikipedia.input(
                    u'What category do you want to split out?')
            else:
                bigcategory = arg[8:]
        elif arg.startswith('-target'):
            if len(arg) == 7:
                target = wikipedia.input(
                    u'What category is the target category?')
            else:
                target = arg[8:]

    if not bigcategory == u'':
        splitOutCategory(bigcategory, target)
    else:
        if not generator:
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    wikipedia.Page(wikipedia.getSite(),
                                   u'Template:Intersect categories'),
                    onlyTemplateInclusion=True), [14])
        for cat in generator:
            intersectCategories(cat)
Exemple #26
0
    def generator(self):
        # Choose which generator to use according to options.
 
        pagegen = None
 
        if self.__workonnew:
            if not self.__number:
                self.__number = config.special_page_limit
            pagegen = pagegenerators.NewpagesPageGenerator(number = self.__number)
 
        elif self.__refpagetitle:
            refpage = wikipedia.Page(wikipedia.getSite(), self.__refpagetitle)
            pagegen = pagegenerators.ReferringPageGenerator(refpage)
 
        elif self.__linkpagetitle:
            linkpage = wikipedia.Page(wikipedia.getSite(), self.__linkpagetitle)
            pagegen = pagegenerators.LinkedPageGenerator(linkpage)
 
        elif self.__catname:
            cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % self.__catname)
 
            if self.__start:
                pagegen = pagegenerators.CategorizedPageGenerator(cat, recurse = self.__catrecurse, start = self.__start)
            else:
                pagegen = pagegenerators.CategorizedPageGenerator(cat, recurse = self.__catrecurse)
 
        elif self.__textfile:
            pagegen = pagegenerators.TextfilePageGenerator(self.__textfile)
 
        else:
            if not self.__start:
                self.__start = '!'
            namespace = wikipedia.Page(wikipedia.getSite(), self.__start).namespace()
            start = wikipedia.Page(wikipedia.getSite(), self.__start).titleWithoutNamespace()
 
            pagegen = pagegenerators.AllpagesPageGenerator(start, namespace)
 
        return pagegen
Exemple #27
0
                action = 'pages'
            elif arg == ('categories'):
                action = 'categories'
            elif arg.startswith('-start:'):
                start = wikipedia.Page(wikipedia.getSite(), arg[7:])
                gen = pagegenerators.AllpagesPageGenerator(
                    start.titleWithoutNamespace(),
                    namespace=start.namespace(),
                    includeredirects=False)
            elif arg.startswith('-cat:'):
                cat = catlib.Category(wikipedia.getSite(),
                                      'Category:%s' % arg[5:])
                gen = pagegenerators.CategorizedPageGenerator(cat)
            elif arg.startswith('-ref:'):
                ref = wikipedia.Page(wikipedia.getSite(), arg[5:])
                gen = pagegenerators.ReferringPageGenerator(ref)
            elif arg.startswith('-link:'):
                link = wikipedia.Page(wikipedia.getSite(), arg[6:])
                gen = pagegenerators.LinkedPageGenerator(link)
            elif arg.startswith('-page:'):
                singlepage = wikipedia.Page(wikipedia.getSite(), arg[6:])
                gen = iter([singlepage])
            #else:
            #bug

        if action == 'pages':
            preloadingGen = pagegenerators.PreloadingGenerator(gen)
            bot = CommonsLinkBot(preloadingGen, acceptall=False)
            bot.pages()
        elif action == 'categories':
            preloadingGen = pagegenerators.PreloadingGenerator(gen)
Exemple #28
0
def main():
    pageName = ''
    singlePage = ''
    summary = ''
    always = False
    doSinglePage = False
    doCategory = False
    deleteSubcategories = True
    doRef = False
    doLinks = False
    doImages = False
    undelete = False
    fileName = ''
    gen = None

    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg.startswith('-file'):
            if len(arg) == len('-file'):
                fileName = pywikibot.input(
                    u'Enter name of file to delete pages from:')
            else:
                fileName = arg[len('-file:'):]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = pywikibot.input(u'Enter a reason for the deletion:')
            else:
                summary = arg[len('-summary:'):]
        elif arg.startswith('-cat'):
            doCategory = True
            if len(arg) == len('-cat'):
                pageName = pywikibot.input(
                    u'Enter the category to delete from:')
            else:
                pageName = arg[len('-cat:'):]
        elif arg.startswith('-nosubcats'):
            deleteSubcategories = False
        elif arg.startswith('-links'):
            doLinks = True
            if len(arg) == len('-links'):
                pageName = pywikibot.input(u'Enter the page to delete from:')
            else:
                pageName = arg[len('-links:'):]
        elif arg.startswith('-ref'):
            doRef = True
            if len(arg) == len('-ref'):
                pageName = pywikibot.input(u'Enter the page to delete from:')
            else:
                pageName = arg[len('-ref:'):]
        elif arg.startswith('-page'):
            doSinglePage = True
            if len(arg) == len('-page'):
                pageName = pywikibot.input(u'Enter the page to delete:')
            else:
                pageName = arg[len('-page:'):]
        elif arg.startswith('-images'):
            doImages = True
            if len(arg) == len('-images'):
                pageName = pywikibot.input(
                    u'Enter the page with the images to delete:')
            else:
                pageName = arg[len('-images'):]
        elif arg.startswith('-undelete'):
            undelete = True

    mysite = pywikibot.getSite()
    if doSinglePage:
        if not summary:
            summary = pywikibot.input(u'Enter a reason for the deletion:')
        page = pywikibot.Page(mysite, pageName)
        gen = iter([page])
    elif doCategory:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_category) \
                      % pageName
        ns = mysite.category_namespace()
        categoryPage = catlib.Category(mysite, ns + ':' + pageName)
        gen = pagegenerators.CategorizedPageGenerator(
            categoryPage, recurse=deleteSubcategories)
    elif doLinks:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_links) % pageName
        pywikibot.setAction(summary)
        linksPage = pywikibot.Page(mysite, pageName)
        gen = pagegenerators.LinkedPageGenerator(linksPage)
    elif doRef:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_ref) % pageName
        refPage = pywikibot.Page(mysite, pageName)
        gen = pagegenerators.ReferringPageGenerator(refPage)
    elif fileName:
        if not summary:
            summary = pywikibot.translate(mysite, msg_simple_delete)
        gen = pagegenerators.TextfilePageGenerator(fileName)
    elif doImages:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_images)
        gen = pagegenerators.ImagesPageGenerator(
            pywikibot.Page(mysite, pageName))

    if gen:
        pywikibot.setAction(summary)
        # We are just deleting pages, so we have no need of using a preloading page generator
        # to actually get the text of those pages.
        bot = DeletionRobot(gen, summary, always, undelete)
        bot.run()
    else:
        pywikibot.showHelp(u'delete')
Exemple #29
0
def main(*args):
    templateNames = []
    templates = {}
    subst = False
    remove = False
    namespaces = []
    editSummary = ''
    addedCat = ''
    acceptAll = False
    genFactory = pagegenerators.GeneratorFactory()
    # If xmlfilename is None, references will be loaded from the live wiki.
    xmlfilename = None
    user = None
    skip = False
    timestamp = None
    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-remove':
            remove = True
        elif arg == '-subst':
            subst = True
        elif arg == ('-always'):
            acceptAll = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlfilename = pywikibot.input(u'Please enter the XML dump\'s filename: ')
            else:
                xmlfilename = arg[5:]
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[len('-namespace:'):]))
            except ValueError:
                namespaces.append(arg[len('-namespace:'):])
        elif arg.startswith('-category:'):
            addedCat = arg[len('-category:'):]
        elif arg.startswith('-summary:'):
            editSummary = arg[len('-summary:'):]
        elif arg.startswith('-user:'******'-user:'******'-skipuser:'******'-skipuser:'******'-timestamp:'):
            timestamp = arg[len('-timestamp:'):]
        else:
            if not genFactory.handleArg(arg):
                templateNames.append(pywikibot.Page(pywikibot.getSite(), arg, defaultNamespace=10).titleWithoutNamespace())

    if subst or remove:
        for templateName in templateNames:
            templates[templateName] = None
    else:
        try:
            for i in range(0, len(templateNames), 2):
                templates[templateNames[i]] = templateNames[i + 1]
        except IndexError:
            pywikibot.output(u'Unless using -subst or -remove, you must give an even number of template names.')
            return

    oldTemplates = []
    ns = pywikibot.getSite().template_namespace()
    for templateName in templates.keys():
        oldTemplate = pywikibot.Page(pywikibot.getSite(), templateName, defaultNamespace=10)
        oldTemplates.append(oldTemplate)

    if xmlfilename:
        gen = XmlDumpTemplatePageGenerator(oldTemplates, xmlfilename)
    else:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        gens = []
        gens = [pagegenerators.ReferringPageGenerator(t, onlyTemplateInclusion = True) for t in oldTemplates]
        gen = pagegenerators.CombinedPageGenerator(gens)
        gen = pagegenerators.DuplicateFilterPageGenerator(gen)

    if namespaces:
        gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    if user:
        gen = UserEditFilterGenerator(gen, user, timestamp, skip)
    preloadingGen = pagegenerators.PreloadingGenerator(gen)

    bot = TemplateRobot(preloadingGen, templates, subst, remove, editSummary, acceptAll, addedCat)
    bot.run()
Exemple #30
0
import sys, os
sys.path.append("/home/rk/py/pywikibot/compat")

import time, re
import wikipedia as pywikibot
import pagegenerators
import logging

# output debug messages
DEBUG = False

site = pywikibot.Site("et", "wikipedia")
tpl = pywikibot.Page(site, u'Mall:EestiAsula')

gen = pagegenerators.ReferringPageGenerator(tpl,
                                            followRedirects=False,
                                            withTemplateInclusion=True,
                                            onlyTemplateInclusion=True)
generator = pagegenerators.PreloadingGenerator(gen, pageNumber=50)

claims_rules = {
    ##### main type deprecated    107: 'q618123', # GDN = geo
    17: 'q191',  # Country = Estonia

    #TODO: is in the administrative unit
    #TODO: administrative unit type

    #instance of << town Q3957 << alev << alevik << village Q532
}


def editclaim(article, pid, value):