Python NamespaceFilterPageGenerator Beispiele, pagegenerators.NamespaceFilterPageGenerator Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: templatecount.py Projekt: vmorrisonwood/pywikia

 def countTemplates(self, templates, namespaces):
     mysite = wikipedia.getSite()
     mytpl = mysite.template_namespace() + ':'
     finalText = [
         u'Number of transclusions per template',
         u'------------------------------------'
     ]
     total = 0
     # The names of the templates are the keys, and the numbers of transclusions are the values.
     templateDict = {}
     for template in templates:
         gen = pagegenerators.ReferringPageGenerator(
             wikipedia.Page(mysite, mytpl + template),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(
                 gen, namespaces)
         count = 0
         for page in gen:
             count += 1
         templateDict[template] = count
         finalText.append(u'%s: %d' % (template, count))
         total += count
     for line in finalText:
         wikipedia.output(line, toStdout=True)
     wikipedia.output(u'TOTAL: %d' % total, toStdout=True)
     wikipedia.output(u'Report generated on %s' %
                      datetime.datetime.utcnow().isoformat(),
                      toStdout=True)
     return templateDict

Beispiel #2

0

Datei anzeigen

def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site,
                                  "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()

Beispiel #3

0

Datei anzeigen

Datei: disambredir.py Projekt: vmorrisonwood/pywikia

def main():
    global mysite, linktrail, page
    start = []
    for arg in wikipedia.handleArgs():
        start.append(arg)
    if start:
        start = " ".join(start)
    else:
        start = "!"
    mysite = wikipedia.getSite()
    linktrail = mysite.linktrail()
    try:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)
    except wikipedia.NoPage:
        print "The bot does not know the disambiguation category for your wiki."
        raise
    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            wikipedia.getall(mysite, pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []

Beispiel #4

0

Datei anzeigen

Datei: templatecount.py Projekt: vmorrisonwood/pywikia

 def listTemplates(self, templates, namespaces):
     mysite = wikipedia.getSite()
     count = 0
     # The names of the templates are the keys, and lists of pages transcluding templates are the values.
     templateDict = {}
     finalText = [u'List of pages transcluding templates:']
     for template in templates:
         finalText.append(u'* %s' % template)
     finalText.append(u'------------------------------------')
     for template in templates:
         transcludingArray = []
         gen = pagegenerators.ReferringPageGenerator(
             wikipedia.Page(mysite,
                            mysite.template_namespace() + ':' + template),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(
                 gen, namespaces)
         for page in gen:
             finalText.append(u'%s' % page.title())
             count += 1
             transcludingArray.append(page)
         templateDict[template] = transcludingArray
     finalText.append(u'Total page count: %d' % count)
     for line in finalText:
         wikipedia.output(line, toStdout=True)
     wikipedia.output(u'Report generated on %s' %
                      datetime.datetime.utcnow().isoformat(),
                      toStdout=True)
     return templateDict

Beispiel #5

0

Datei anzeigen

 def __init__(self, pageToUnlink, namespaces, always):
     self.pageToUnlink = pageToUnlink
     gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
     if namespaces != []:
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
     self.generator = pagegenerators.PreloadingGenerator(gen)
     linktrail = pywikibot.getSite().linktrail()
     # The regular expression which finds links. Results consist of four
     # groups:
     #
     # group title is the target page title, that is, everything
     # before | or ].
     #
     # group section is the page section.
     # It'll include the # to make life easier for us.
     #
     # group label is the alternative link title, that's everything
     # between | and ].
     #
     # group linktrail is the link trail, that's letters after ]] which are
     # part of the word.
     # note that the definition of 'letter' varies from language to language.
     self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
                             % linktrail)
     self.always = always
     self.done = False
     self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
                                     self.pageToUnlink.title())

Beispiel #6

0

Datei anzeigen

Datei: object_location_rijksmonumenten.py Projekt: xqt/toollabs

def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if not generator:
        generator = getRijksmonumentWithoutLocation()

    # Get a preloading generator with only images
    pgenerator = pagegenerators.PreloadingGenerator(
        pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
    for page in pgenerator:
        locationTemplate = locateImage(page, conn, cursor)
        if locationTemplate:
            addLocation(page, locationTemplate)

Beispiel #7

0

Datei anzeigen

Datei: fixing_redirects.py Projekt: Botomatik/JackBot

def main():
    featured = False
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-featured':
            featured = True
        else:
            genFactory.handleArg(arg)

    mysite = pywikibot.getSite()
    if mysite.sitename() == 'wikipedia:nl':
        pywikibot.output(
            u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}'
        )
        sys.exit()

    if featured:
        featuredList = pywikibot.translate(mysite, featured_articles)
        ref = pywikibot.Page(pywikibot.getSite(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        pywikibot.showHelp('fixing_redirects')

Beispiel #8

0

Datei anzeigen

def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    always = False

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpSelflinkPageGenerator(xmlFilename)
        elif arg == '-sql':
            # NOT WORKING YET
            query = """
SELECT page_namespace, page_title
FROM page JOIN pagelinks JOIN text ON (page_id = pl_from AND page_id = old_id)
WHERE pl_title = page_title
AND pl_namespace = page_namespace
AND page_namespace = 0
AND (old_text LIKE concat('%[[', page_title, ']]%')
    OR old_text LIKE concat('%[[', page_title, '|%'))
LIMIT 100"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('selflink')
    else:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = SelflinkBot(preloadingGen, always)
        bot.run()

Beispiel #9

0

Datei anzeigen

Datei: archivebot.py Projekt: dysklyver/pywikipediabot

def generateTransclusions(Site, template, namespaces=[]):
    pywikibot.output(u'Fetching template transclusions...')
    transclusionPage = pywikibot.Page(Site, template, defaultNamespace=10)
    gen = pagegenerators.ReferringPageGenerator(transclusionPage,
                                                onlyTemplateInclusion=True)
    if namespaces:
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces, Site)
    for page in gen:
        yield page

Beispiel #10

0

Datei anzeigen

Datei: zzgallery.py Projekt: PersianWikipedia/fawikibot

def main():
    summary_commandline,template,gen = None,None,None
    exceptions,PageTitles,namespaces = [],[],[]
    cat=''
    autoText,autoTitle = False,False
    genFactory = pagegenerators.GeneratorFactory()
    arg=False#------if you dont want to work with arguments leave it False if you want change it to True---
    if arg==False:
        for arg in wikipedia.handleArgs():
            if arg == '-autotitle':
                autoTitle = True
            elif arg == '-autotext':
                autoText = True
            elif arg.startswith( '-page:' ):
                if len(arg) == 6:
                    PageTitles.append(wikipedia.input( u'Which page do you want to chage?' ))
                else:
                    PageTitles.append(arg[6:])
            elif arg.startswith( '-cat:' ):
                if len(arg) == 5:
                    cat=wikipedia.input( u'Which Category do you want to chage?' )
                else:
                    cat='Category:'+arg[5:]
            elif arg.startswith( '-template:' ):
                if len(arg) == 10:
                    template.append(wikipedia.input( u'Which Template do you want to chage?' ))
                else:
                    template.append('Template:'+arg[10:])
            elif arg.startswith('-except:'):
                exceptions.append(arg[8:])
            elif arg.startswith( '-namespace:' ):
                namespaces.append( int( arg[11:] ) )
            elif arg.startswith( '-ns:' ):
                namespaces.append( int( arg[4:] ) )    
            elif arg.startswith( '-summary:' ):
                wikipedia.setAction( arg[9:] )
                summary_commandline = True
            else:
                generator = genFactory.handleArg(arg)
                if generator:
                    gen = generator
    else:
        PageTitles = [raw_input(u'Page:> ').decode('utf-8')]
    if cat!='':
        facatfalist=facatlist(cat)
        if facatfalist!=False:
            run(facatfalist)    
    if PageTitles:
        pages = [wikipedia.Page(faSite,PageTitle) for PageTitle in PageTitles]
        gen = iter( pages )
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
    preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 60 )#---number of pages that you want load at same time
    run(preloadingGen)

Beispiel #11

0

Datei anzeigen

def main():
    summary_commandline, gen, template = None, None, None
    namespaces, PageTitles, exceptions = [], [], []
    encat = ''
    autoText, autoTitle = False, False
    recentcat, newcat = False, False
    genFactory = pagegenerators.GeneratorFactory()
    for arg in wikipedia.handleArgs():
        if arg == '-autotitle':
            autoTitle = True
        elif arg == '-autotext':
            autoText = True
        elif arg.startswith('-except:'):
            exceptions.append(arg[8:])

        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = wikipedia.input(
                    u'At which page do you want to start?')
            firstPageTitle = wikipedia.Page(
                fasite, firstPageTitle).title(withNamespace=False)
            gen = pagegenerators.AllpagesPageGenerator(firstPageTitle,
                                                       0,
                                                       includeredirects=True)
        elif arg.startswith('-template:'):
            template = arg[10:]
        elif arg.startswith('-namespace:'):
            namespaces.append(int(arg[11:]))
        elif arg.startswith('-summary:'):
            wikipedia.setAction(arg[9:])
            summary_commandline = True
        else:
            generator = genFactory.handleArg(arg)
            if generator:
                gen = generator
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
        preloadingGen = pagegenerators.NamespaceFilterPageGenerator(
            gen, namespaces)
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
    _cache, last_timestamp = get_cache()
    add_text(preloadingGen)

    now = str(datetime.now())
    todaynum = int(now.split('-')[2].split(' ')[0]) + int(
        now.split('-')[1]) * 30 + (int(now.split('-')[0]) - 2000) * 365

    if last_timestamp + 3 < todaynum:
        put_cache(_cache, todaynum)
    else:
        put_cache({}, 0)

Beispiel #12

0

Datei anzeigen

Datei: nowcommons.py Projekt: h4ck3rm1k3/rootstriker-fec-bot

 def getPageGenerator(self):
     if use_hash:
         gen = self.useHashGenerator()
     else:
         gen = pagegenerators.ReferringPageGenerator(
             self.nowCommonsTemplate,
             followRedirects=True,
             onlyTemplateInclusion=True)
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, [6])
     return gen

Beispiel #13

0

Datei anzeigen

Datei: it.wikt.import-sons-commons.py Projekt: Botomatik/JackBot

def crawlerLink(pagename, apres):
    modifier = u'False'
    #pagename = unicode(arg[len('-links:'):], 'utf-8')
    page = wikipedia.Page(site, pagename)
    gen = pagegenerators.ReferringPageGenerator(page)
    gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    for Page in pagegenerators.PreloadingGenerator(gen, 100):
        #print(Page.title().encode(config.console_encoding, 'replace'))
        if not apres or apres == u'' or modifier == u'True':
            modification(Page.title())  #crawlerLink(Page.title())
        elif Page.title() == apres:
            modifier = u'True'

Beispiel #14

0

Datei anzeigen

def main():
    """
    Process command line arguments and invoke bot.
    """
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Never ask before changing a page
    always = False
    to13 = False
    format = False

    for arg in pywikibot.handleArgs():
        if arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        elif arg == '-to13':
            to13 = True
        elif arg == '-format':
            format = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    site = pywikibot.getSite()
    if pageTitle:
        gen = iter([pywikibot.Page(site, t) for t in pageTitle])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('isbn')
    else:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = IsbnBot(preloadingGen, to13=to13, format=format, always=always)
        bot.run()

Beispiel #15

0

Datei anzeigen

 def countRefs(self, templates, namespaces):
     mysite = wikipedia.getSite()
     mytpl  = mysite.template_namespace()+':'
     finalText = [u'Number of transclusions per template',u'------------------------------------']
     for template in templates:
         gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
         count = 0
         for page in gen:
             count += 1
         finalText.append(u'%s: %d' % (template, count))
     for line in finalText:
         wikipedia.output(line)

Beispiel #16

0

Datei anzeigen

Datei: it.wikt.import-sons-commons.py Projekt: Botomatik/JackBot

def crawlerCatLink(pagename, apres):
    modifier = u'False'
    cat = catlib.Category(site, pagename)
    pages = cat.articlesList(False)
    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        page = wikipedia.Page(site, Page.title())
        gen = pagegenerators.ReferringPageGenerator(page)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
        for PageLiee in pagegenerators.PreloadingGenerator(gen, 100):
            #print(Page.title().encode(config.console_encoding, 'replace'))
            if not apres or apres == u'' or modifier == u'True':
                modification(PageLiee.title())  #crawlerLink(Page.title())
            elif PageLiee.title() == apres:
                modifier = u'True'

Beispiel #17

0

Datei anzeigen

Datei: commonscat.py Projekt: XXN/pywikibot-compat

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    summary = None
    generator = None
    always = False
    ns = []
    ns.append(14)

    # Process global args and prepare generator args parser

    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs(*args):
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = pywikibot.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            primaryCommonscat, commonscatAlternatives = \
                CommonscatBot.getCommonscatTemplate(
                    pywikibot.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    pywikibot.Page(pywikibot.getSite(),
                                   u'Template:' + primaryCommonscat),
                    onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()

    if generator:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        bot = CommonscatBot(pregenerator, always, summary)
        bot.run()
    else:
        pywikibot.showHelp()

Beispiel #18

0

Datei anzeigen

Datei: wpmed_society_scan.py Projekt: rwjuk/g13bot_tools

 def template_dict_generator(templates, namespaces):
     mysite = pywikibot.getSite()
     # The names of the templates are the keys, and lists of pages
     # transcluding templates are the values.
     mytpl = mysite.getNamespaceIndex(mysite.template_namespace())
     for template in templates:
         transcludingArray = []
         gen = pg.ReferringPageGenerator(
             pywikibot.Page(mysite, template, defaultNamespace=mytpl),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pg.NamespaceFilterPageGenerator(gen, namespaces)
         for page in gen:
             transcludingArray.append(page)
         yield template, transcludingArray

Beispiel #19

0

Datei anzeigen

 def getPageGenerator(self):
     if use_hash:
         gen = self.useHashGenerator()
     else:
         nowCommonsTemplates = [pywikibot.Page(self.site, title,
                                               defaultNamespace=10)
                                for title in self.ncTemplates()]
         gens = [pg.ReferringPageGenerator(t, followRedirects=True,
                                           onlyTemplateInclusion=True)
                 for t in nowCommonsTemplates]
         gen = pg.CombinedPageGenerator(gens)
         gen = pg.NamespaceFilterPageGenerator(gen, [6])
         gen = pg.DuplicateFilterPageGenerator(gen)
         gen = pg.PreloadingGenerator(gen)
     return gen

Beispiel #20

0

Datei anzeigen

def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = wikipedia.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        wikipedia.showHelp('noreferences')
    else:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()

Beispiel #21

0

Datei anzeigen

def crawlerCat(category, recursif, apres):
    modifier = u'False'
    cat = catlib.Category(site, category)
    pages = cat.articlesList(False)
    gen = pagegenerators.NamespaceFilterPageGenerator(pages, [ns])
    for Page in pagegenerators.PreloadingGenerator(gen, 100):
        if not apres or apres == u'' or modifier == u'True':
            modification(Page.title())  #crawlerLink(Page.title())
        elif Page.title() == apres:
            modifier = u'True'
    if recursif == True:
        subcat = cat.subcategories(recurse=True)
        for subcategory in subcat:
            pages = subcategory.articlesList(False)
            for Page in pagegenerators.PreloadingGenerator(pages, 100):
                modification(Page.title())

Beispiel #22

0

Datei anzeigen

def main():
    '''
    Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    '''
    summary = None
    generator = None
    checkcurrent = False
    always = False
    ns = []
    ns.append(14)
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = pywikibot.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
            primaryCommonscat, commonscatAlternatives = \
                               CommonscatBot.getCommonscatTemplate(
                                   pywikibot.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    pywikibot.Page(pywikibot.getSite(),
                                   u'Template:' + primaryCommonscat),
                    onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData(
            u'You have to specify the generator you want to use for the script!'
        )

    pregenerator = pagegenerators.PreloadingGenerator(generator)
    bot = CommonscatBot(pregenerator, always, summary)
    bot.run()

Beispiel #23

0

Datei anzeigen

def main():
    start = '!'
    featured = False
    namespace = None
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg == '-featured':
            featured = True
        elif arg.startswith('-namespace'):
            if len(arg) == 10:
                namespace = int(wikipedia.input(u'Which namespace should be processed?'))
            else:
                namespace = int(arg[11:])
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()

    mysite = wikipedia.getSite()
    if mysite.sitename() == 'wikipedia:nl':
        wikipedia.output(u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}')
        sys.exit()

    linktrail = mysite.linktrail()
    if featured:
        featuredList = wikipedia.translate(mysite, featured_articles)
        ref = wikipedia.Page(wikipedia.getSite(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
        for page in generator:
            workon(page)
    elif namespace is not None:
        for page in pagegenerators.AllpagesPageGenerator(start=start, namespace=namespace, includeredirects=False):
            workon(page)
    elif gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        wikipedia.showHelp('fixing_redirects')

Beispiel #24

0

Datei anzeigen

Datei: commonscat.py Projekt: vmorrisonwood/pywikia

def main():
    '''
    Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    '''
    summary = None; generator = None; checkcurrent = False; always = False
    ns = []
    ns.append(14)
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = wikipedia.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
            primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(wikipedia.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        if not page.exists():
           wikipedia.output(u'Page %s does not exist. Skipping.' % page.aslink())
        elif page.isRedirectPage():
           wikipedia.output(u'Page %s is a redirect. Skipping.' % page.aslink())
        elif page.isCategoryRedirect():
           wikipedia.output(u'Page %s is a category redirect. Skipping.' % page.aslink())
        elif page.isDisambig():
           wikipedia.output(u'Page %s is a disambiguation. Skipping.' % page.aslink())
        else:
            (status, always) = addCommonscat(page, summary, always)

Beispiel #25

0

Datei anzeigen

Datei: intersect_categories.py Projekt: xqt/toollabs

def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    bigcategory = u''
    target = u''

    generator = None
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
                generator = [
                    wikipedia.Page(
                        wikipedia.getSite(),
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
        elif arg.startswith('-bigcat'):
            if len(arg) == 7:
                bigcategory = wikipedia.input(
                    u'What category do you want to split out?')
            else:
                bigcategory = arg[8:]
        elif arg.startswith('-target'):
            if len(arg) == 7:
                target = wikipedia.input(
                    u'What category is the target category?')
            else:
                target = arg[8:]

    if not bigcategory == u'':
        splitOutCategory(bigcategory, target)
    else:
        if not generator:
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    wikipedia.Page(wikipedia.getSite(),
                                   u'Template:Intersect categories'),
                    onlyTemplateInclusion=True), [14])
        for cat in generator:
            intersectCategories(cat)

Beispiel #26

0

Datei anzeigen

Datei: wlmdownload.py Projekt: macressler/hay

def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    generator = None
    genFactory = pagegenerators.GeneratorFactory()
    target = u'/Users/hay/tmp/wlm/'

    for arg in wikipedia.handleArgs():
        if arg.startswith('-target:'):
            target = arg[len('-target:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
        # Get a preloading generator with only images
        pgenerator = pagegenerators.PreloadingGenerator(
            pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
        for page in pgenerator:
            imagepage = wikipedia.ImagePage(page.site(), page.title())
            downloadFile(imagepage, target)

Beispiel #27

0

Datei anzeigen

def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
        # Get a preloading generator with only images
        pgenerator = pagegenerators.PreloadingGenerator(
            pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
        for page in pgenerator:
            categorizeImage(page, conn, cursor)

Beispiel #28

0

Datei anzeigen

Datei: template.py Projekt: moleculea/ess

def main(*args):
    templateNames = []
    templates = {}
    subst = False
    remove = False
    namespaces = []
    editSummary = ''
    addedCat = ''
    acceptAll = False
    genFactory = pagegenerators.GeneratorFactory()
    # If xmlfilename is None, references will be loaded from the live wiki.
    xmlfilename = None
    user = None
    skip = False
    timestamp = None
    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-remove':
            remove = True
        elif arg == '-subst':
            subst = True
        elif arg == ('-always'):
            acceptAll = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlfilename = pywikibot.input(u'Please enter the XML dump\'s filename: ')
            else:
                xmlfilename = arg[5:]
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[len('-namespace:'):]))
            except ValueError:
                namespaces.append(arg[len('-namespace:'):])
        elif arg.startswith('-category:'):
            addedCat = arg[len('-category:'):]
        elif arg.startswith('-summary:'):
            editSummary = arg[len('-summary:'):]
        elif arg.startswith('-user:'******'-user:'******'-skipuser:'******'-skipuser:'******'-timestamp:'):
            timestamp = arg[len('-timestamp:'):]
        else:
            if not genFactory.handleArg(arg):
                templateNames.append(pywikibot.Page(pywikibot.getSite(), arg, defaultNamespace=10).titleWithoutNamespace())

    if subst or remove:
        for templateName in templateNames:
            templates[templateName] = None
    else:
        try:
            for i in range(0, len(templateNames), 2):
                templates[templateNames[i]] = templateNames[i + 1]
        except IndexError:
            pywikibot.output(u'Unless using -subst or -remove, you must give an even number of template names.')
            return

    oldTemplates = []
    ns = pywikibot.getSite().template_namespace()
    for templateName in templates.keys():
        oldTemplate = pywikibot.Page(pywikibot.getSite(), templateName, defaultNamespace=10)
        oldTemplates.append(oldTemplate)

    if xmlfilename:
        gen = XmlDumpTemplatePageGenerator(oldTemplates, xmlfilename)
    else:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        gens = []
        gens = [pagegenerators.ReferringPageGenerator(t, onlyTemplateInclusion = True) for t in oldTemplates]
        gen = pagegenerators.CombinedPageGenerator(gens)
        gen = pagegenerators.DuplicateFilterPageGenerator(gen)

    if namespaces:
        gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    if user:
        gen = UserEditFilterGenerator(gen, user, timestamp, skip)
    preloadingGen = pagegenerators.PreloadingGenerator(gen)

    bot = TemplateRobot(preloadingGen, templates, subst, remove, editSummary, acceptAll, addedCat)
    bot.run()

Beispiel #29

0

Datei anzeigen

def main():
    gen = None
    singlePageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    HTTPignore = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    global day
    day = 7
    for arg in pywikibot.handleArgs():
        if arg == '-talk':
            config.report_dead_links_on_talk = True
        elif arg == '-notalk':
            config.report_dead_links_on_talk = False
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-repeat':
            gen = RepeatPageGenerator()
        elif arg.startswith('-ignore:'):
            HTTPignore.append(int(arg[8:]))
        elif arg.startswith('-day:'):
            day = int(arg[5:])
        else:
            if not genFactory.handleArg(arg):
                singlePageTitle.append(arg)

    if singlePageTitle:
        singlePageTitle = ' '.join(singlePageTitle)
        page = pywikibot.Page(pywikibot.getSite(), singlePageTitle)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        if namespaces != []:
            gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        # fetch at least 240 pages simultaneously from the wiki, but more if
        # a high thread number is set.
        pageNumber = max(240, config.max_external_links * 2)
        gen = pagegenerators.PreloadingGenerator(gen, pageNumber = pageNumber)
        gen = pagegenerators.RedirectFilterPageGenerator(gen)
        bot = WeblinkCheckerRobot(gen, HTTPignore)
        try:
            bot.run()
        finally:
            waitTime = 0
            # Don't wait longer than 30 seconds for threads to finish.
            while countLinkCheckThreads() > 0 and waitTime < 30:
                try:
                    pywikibot.output(
                        u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads())
                    # wait 1 second
                    time.sleep(1)
                    waitTime += 1
                except KeyboardInterrupt:
                    pywikibot.output(u'Interrupted.')
                    break
            if countLinkCheckThreads() > 0:
                pywikibot.output(u'Remaining %i threads will be killed.'
                                 % countLinkCheckThreads())
                # Threads will die automatically because they are daemonic.
            if bot.history.reportThread:
                bot.history.reportThread.shutdown()
                # wait until the report thread is shut down; the user can
                # interrupt it by pressing CTRL-C.
                try:
                    while bot.history.reportThread.isAlive():
                        time.sleep(0.1)
                except KeyboardInterrupt:
                    pywikibot.output(u'Report thread interrupted.')
                    bot.history.reportThread.kill()
            pywikibot.output(u'Saving history...')
            bot.history.save()
    else:
        pywikibot.showHelp()

Beispiel #30

0

Datei anzeigen

def main():
    quietMode = False  # use -quiet to get less output
    # if the -file argument is used, page titles are stored in this array.
    # otherwise it will only contain one page.
    articles = []
    # if -file is not used, this temporary array is used to read the page title.
    page_title = []

    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []

    xmlfilename = None
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlfilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlfilename = arg[5:]
            gen = TableXmlDumpPageGenerator(xmlfilename)
        elif arg == '-sql':
            query = u"""
SELECT page_namespace, page_title
FROM page JOIN text ON (page_id = old_id)
WHERE old_text LIKE '%<table%'
LIMIT 200"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-skip:'):
            articles = articles[articles.index(arg[6:]):]
        elif arg.startswith('-auto'):
            config.table2wikiAskOnlyWarnings = True
            config.table2wikiSkipWarnings = True
            print "Automatic mode!\n"
        elif arg.startswith('-quiet'):
            quietMode = True
        else:
            if not genFactory.handleArg(arg):
                page_title.append(arg)

    # if the page is given as a command line argument,
    # connect the title's parts with spaces
    if page_title != []:
        page_title = ' '.join(page_title)
        page = pywikibot.Page(pywikibot.getSite(), page_title)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()

    if gen:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = Table2WikiRobot(preloadingGen, quietMode)
        bot.run()
    else:
        pywikibot.showHelp('table2wiki')