def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if not generator:
        generator = getRijksmonumentWithoutLocation()

    # Get a preloading generator with only images
    pgenerator = pagegenerators.PreloadingGenerator(
        pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
    for page in pgenerator:
        locationTemplate = locateImage(page, conn, cursor)
        if locationTemplate:
            addLocation(page, locationTemplate)
Esempio n. 2
0
def main():
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    pageTitleParts = []

    for arg in pywikibot.handleArgs():
        if arg.startswith("-reg"):
            arg = '-transcludes:Infobox film'
        if not genFactory.handleArg(arg):
            pageTitleParts.append(arg)

    if pageTitleParts != []:
        # We will only work on a single page.
        pageTitle = ' '.join(pageTitleParts)
        page = pywikibot.Page(pywikibot.getSite(), pageTitle)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = FilmBannerBot(gen)
        bot.run()
    else:
        pywikibot.showHelp()
Esempio n. 3
0
def main():
    # The generator gives the pages that should be worked upon.
    gen = None
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False
    wantHelp = False

    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            wantHelp = True

    if not wantHelp:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.

        cat = catlib.Category(wikipedia.getSite(),
                              'Category:%s' % 'IP-Network')
        nets_gen = pagegenerators.CategorizedPageGenerator(cat,
                                                           start=None,
                                                           recurse=False)
        nets_gen = pagegenerators.PreloadingGenerator(nets_gen)

        bot = IpNetworkBot(nets_gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()
Esempio n. 4
0
def main(args):
    """ Grab a bunch of images and tag them if they are not categorized. """
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg.startswith('-yesterday'):
            generator = uploadedYesterday(site)
        elif arg.startswith('-recentchanges'):
            generator = recentChanges(site=site, delay=120)
        else:
            genFactory.handleArg(arg)
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        pywikibot.output(u'You have to specify the generator you want to use '
                         u'for the program!')
    else:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        for page in pregenerator:
            if page.exists() and (page.namespace() == 6) \
               and (not page.isRedirectPage()):
                if isUncat(page):
                    addUncat(page)
Esempio n. 5
0
def crawlerLink(pagename):
    #pagename = unicode(arg[len('-links:'):], 'utf-8')
    page = wikipedia.Page(site, pagename)
    gen = pagegenerators.ReferringPageGenerator(page)
    #gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    for Page in pagegenerators.PreloadingGenerator(gen, 100):
        modification(Page.title())
Esempio n. 6
0
def UpdateRepoCats(*args):
    # Get List of all articles in Category:All add-ons
    site = pywikibot.getSite()

    # Download all repos as soup element
    soups = importAllAddonXML()

    # Get all pages in Category All add-ons
    cat = catlib.Category(site, u'Category:All add-ons')
    pages = cat.articlesList(False)
    allRepoCats = repoCatList(site)

    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        # Get addon_id via regexp
        addon_id = None
        addon_id = re.search("\|ID=([a-zA-Z0-9_\.\-]+)", Page.get())
        if not addon_id:
            pywikibot.output("Can't find addon_id for %s, skipping it..." %
                             Page.title())
            continue
        else:
            addon_id = addon_id.group(1)
            pywikibot.output("Identifying Repos for %s." % addon_id)
        # See if addon_id can be found in repos
        repos = checkInRepo(addon_id, soups)
        addRemoveRepoCats(Page, repos, allRepoCats)
Esempio n. 7
0
 def refreshGenerator(self):
     generator = pagegenerators.CategorizedPageGenerator(
         self.csdCat, start=self.savedProgress)
     # wrap another generator around it so that we won't produce orphaned talk pages.
     generator2 = pagegenerators.PageWithTalkPageGenerator(generator)
     self.preloadingGen = pagegenerators.PreloadingGenerator(generator2,
                                                             pageNumber=20)
Esempio n. 8
0
def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []

    # Parse command line arguments
    for arg in pywikibot.handleArgs():
        if arg.startswith("-reg"):
            arg = '-cat:Unassessed film articles'
        if not genFactory.handleArg(arg):
            pageTitleParts.append(arg)

    if pageTitleParts != []:
        # We will only work on a single page.
        pageTitle = ' '.join(pageTitleParts)
        page = pywikibot.Page(pywikibot.getSite(), pageTitle)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(
            filmfunctions.PagesFromTalkPagesGenerator(gen))
        bot = FilmAssessBot(gen)
        bot.run()
    else:
        pywikibot.showHelp()
Esempio n. 9
0
def crawlerCat(category, recursif, apres):
    modifier = u'False'
    cat = catlib.Category(site, category)
    pages = cat.articlesList(False)
    gen = pagegenerators.NamespaceFilterPageGenerator(pages, [ns])
    for Page in pagegenerators.PreloadingGenerator(gen, 100):
        if not apres or apres == u'' or modifier == u'True':
            modification(Page.title())  #crawlerLink(Page.title())
        elif Page.title() == apres:
            modifier = u'True'
    if recursif == True:
        subcat = cat.subcategories(recurse=True)
        for subcategory in subcat:
            pages = subcategory.articlesList(False)
            for Page in pagegenerators.PreloadingGenerator(pages, 100):
                modification(Page.title())
Esempio n. 10
0
def main():
    featured = False
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-featured':
            featured = True
        else:
            genFactory.handleArg(arg)

    mysite = pywikibot.getSite()
    if mysite.sitename() == 'wikipedia:nl':
        pywikibot.output(
            u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}'
        )
        sys.exit()

    if featured:
        featuredList = pywikibot.translate(mysite, featured_articles)
        ref = pywikibot.Page(pywikibot.getSite(), featuredList)
        gen = pagegenerators.ReferringPageGenerator(ref)
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        for page in pagegenerators.PreloadingGenerator(gen):
            workon(page)
    else:
        pywikibot.showHelp('fixing_redirects')
Esempio n. 11
0
def main():
    oldImage = None
    newImage = None
    summary = ''
    always = False
    loose = False
    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg == '-loose':
            loose = True
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = pywikibot.input(u'Choose an edit summary: ')
            else:
                summary = arg[len('-summary:'):]
        else:
            if oldImage:
                newImage = arg
            else:
                oldImage = arg
    if not oldImage:
        pywikibot.showHelp('image')
    else:
        mysite = pywikibot.getSite()
        ns = mysite.image_namespace()
        oldImagePage = pywikibot.ImagePage(mysite, ns + ':' + oldImage)
        gen = pagegenerators.FileLinksGenerator(oldImagePage)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = ImageRobot(preloadingGen, oldImage, newImage, summary, always,
                         loose)
        bot.run()
Esempio n. 12
0
def main():

    site = wikipedia.getSite()
    gen = site.allpages()
    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = SFRobot(preloadingGen)
    bot.run()
def crawlerAll(start):
    gen = pagegenerators.AllpagesPageGenerator(start,
                                               namespace=0,
                                               includeredirects=False)
    for Page in pagegenerators.PreloadingGenerator(gen, 100):
        #print (Page.title().encode(config.console_encoding, 'replace'))
        modification(Page.title())
Esempio n. 14
0
def main():
    #page generator
    gen = None
    # If the user chooses to work on a single page, this temporary array is
    # used to read the words from the page title. The words will later be
    # joined with spaces to retrieve the full title.
    pageTitle = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if not genFactory.handleArg(arg):
            pageTitle.append(arg)

    if pageTitle:
        # work on a single page
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('inline_images')
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = InlineImagesRobot(preloadingGen)
        bot.run()
Esempio n. 15
0
def main():
    always = False
    for arg in wikipedia.handleArgs():
        if arg == '-always':
            always = True

    mysite = wikipedia.getSite()
    # If anything needs to be prepared, you can do it here
    template_image = wikipedia.translate(wikipedia.getSite(),
                                         template_to_the_image)
    template_user = wikipedia.translate(wikipedia.getSite(),
                                        template_to_the_user)
    except_text_translated = wikipedia.translate(wikipedia.getSite(),
                                                 except_text)
    basicgenerator = pagegenerators.UnusedFilesGenerator()
    generator = pagegenerators.PreloadingGenerator(basicgenerator)
    for page in generator:
        wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        if except_text_translated not in page.getImagePageHtml(
        ) and 'http://' not in page.get():
            wikipedia.output(u'\n' + page.title())
            appendtext(page, template_image, always)
            uploader = page.getFileVersionHistory().pop()[1]
            usertalkname = u'User Talk:%s' % uploader
            usertalkpage = wikipedia.Page(mysite, usertalkname)
            msg2uploader = template_user % page.title()
            appendtext(usertalkpage, msg2uploader, always)
Esempio n. 16
0
 def __init__(self, pageToUnlink, namespaces, always):
     self.pageToUnlink = pageToUnlink
     gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
     if namespaces != []:
         gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
     self.generator = pagegenerators.PreloadingGenerator(gen)
     linktrail = pywikibot.getSite().linktrail()
     # The regular expression which finds links. Results consist of four
     # groups:
     #
     # group title is the target page title, that is, everything
     # before | or ].
     #
     # group section is the page section.
     # It'll include the # to make life easier for us.
     #
     # group label is the alternative link title, that's everything
     # between | and ].
     #
     # group linktrail is the link trail, that's letters after ]] which are
     # part of the word.
     # note that the definition of 'letter' varies from language to language.
     self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
                             % linktrail)
     self.always = always
     self.done = False
     self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
                                     self.pageToUnlink.title())
Esempio n. 17
0
def Manual_main():
    wikipedia.config.put_throttle = 0
    wikipedia.put_throttle.setDelay()
    gen= None
    word=u''
    PageTitles = []
    genFactory = pagegenerators.GeneratorFactory()
    for arg in wikipedia.handleArgs():
        if arg.startswith( '-page' ):
            PageTitles.append( arg[6:] )
            break
        else:
            generator = genFactory.handleArg( arg )
            if generator:
                gen = generator
    if PageTitles:
        pages = [wikipedia.Page(wikipedia.getSite(),PageTitle) for PageTitle in PageTitles]
        gen = iter( pages )
    if not gen:
        wikipedia.stopme()    
        sys.exit()
    preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber = 60)
    for faTitle in preloadingGen:
        wikipedia.output(u'---'+faTitle.title()+u'-----')
        OurResult=main(faTitle.title(),word)
        OurResult=Open_json(OurResult,faTitle.title())
        if OurResult.strip():
            wikipedia.output(OurResult)
            with codecs.open(BotAdress_main+u'zz_most_miss_result.txt' ,mode = 'a',encoding = 'utf8' ) as f:
                f.write(OurResult.strip()+u'\n')
            with codecs.open(BotAdress_main+u'zz_most_miss_result_number.txt' ,mode = 'a',encoding = 'utf8' ) as f:
                f.write(OurResult.split(u'@')[0].strip()+u'\n')
Esempio n. 18
0
def main():
    global mysite, linktrail, page
    start = []
    for arg in wikipedia.handleArgs():
        start.append(arg)
    if start:
        start = " ".join(start)
    else:
        start = "!"
    mysite = wikipedia.getSite()
    linktrail = mysite.linktrail()
    try:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)
    except wikipedia.NoPage:
        print "The bot does not know the disambiguation category for your wiki."
        raise
    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            wikipedia.getall(mysite, pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []
Esempio n. 19
0
def main(*args):
    # Disable cosmetic changes because we don't want to modify any page
    # content, so that we don't flood the histories with minor changes.
    config.cosmetic_changes = False
    #page generator
    gen = None
    genFactory = pagegenerators.GeneratorFactory()
    redirs = False
    # If the user chooses to work on a single page, this temporary array is
    # used to read the words from the page title. The words will later be
    # joined with spaces to retrieve the full title.
    pageTitle = []
    for arg in pywikibot.handleArgs(*args):
        if genFactory.handleArg(arg):
            continue
        if arg == '-redir':
            redirs = True
        else:
            pageTitle.append(arg)

    gen = genFactory.getCombinedGenerator()
    if not gen:
        if pageTitle:
            # work on a single page
            page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
            gen = iter([page])
        else:
            pywikibot.showHelp()
            return
    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = TouchBot(preloadingGen, redirs)
    bot.run()
Esempio n. 20
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site,
                                  "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()
Esempio n. 21
0
def main():
    global always
    always = False

    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True

    mysite = pywikibot.getSite()
    # If anything needs to be prepared, you can do it here
    template_image = pywikibot.translate(pywikibot.getSite(),
                                         template_to_the_image)
    template_user = pywikibot.translate(pywikibot.getSite(),
                                        template_to_the_user)
    except_text_translated = pywikibot.translate(pywikibot.getSite(),
                                                 except_text)
    basicgenerator = pagegenerators.UnusedFilesGenerator()
    generator = pagegenerators.PreloadingGenerator(basicgenerator)
    for page in generator:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        if except_text_translated not in page.getImagePageHtml() and \
           'http://' not in page.get():
            pywikibot.output(u'\n' + page.title())
            if template_image in page.get():
                pywikibot.output(u"%s done already" % page.aslink())
                continue
            appendtext(page, u"\n\n" + template_image)
            uploader = page.getFileVersionHistory().pop()[1]
            usertalkname = u'User Talk:%s' % uploader
            usertalkpage = pywikibot.Page(mysite, usertalkname)
            msg2uploader = template_user % {'title': page.title()}
            appendtext(usertalkpage, msg2uploader)
Esempio n. 22
0
def Main () :
    site = pywikibot.getSite()
    d =datetime.today()
    datestring =d.isoformat()
    zipfilename="archive%s.zip" % datestring
    z = zipfile.ZipFile(zipfilename, "w") 
    for x in ('Candidates_for_speedy_deletion_as_hoaxes',
              'Candidates_for_speedy_deletion_as_importance_or_significance_not_asserted',
              'Candidates_for_speedy_deletion_for_unspecified_reason') :
        cat = catlib.Category(site, x)
        pages = cat.articlesList(False)
        gen = pagegenerators.PreloadingGenerator(pages,100)
        for Page in gen:
            outfile = "PAGES/%s.txt" % Page.urlname()
            text= Page.get()
            sutf8 = text.encode('UTF-8')
            print outfile
            z.writestr(outfile,sutf8)
        
        count=0


        for strings in gen.data:
            for string in strings:
                for string2 in string:
                    count = count +1
#                    sutf8 = string2.encode('UTF-8')
                    z.writestr("RawFiles/%s%d.xml" % (x,count) ,string2)

            
    z.close()
    push_zip(zipfilename)
Esempio n. 23
0
def main(args):
    pywikibot.output(u'WARNING: This is an experimental bot')
    pywikibot.output(
        u'WARNING: It will only work on self published work images')
    pywikibot.output(u'WARNING: This bot is still full of bugs')
    pywikibot.output(u'WARNING: Use at your own risk!')

    generator = None
    autonomous = False
    checkTemplate = True

    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-nochecktemplate':
            checkTemplate = False
        elif arg == '-autonomous':
            autonomous = True
        else:
            genFactory.handleArg(arg)

    if not supportedSite():
        pywikibot.output(u'Sorry, this site is not supported (yet).')
        return False

    generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData(
            'You have to specify the generator you want to use for the script!'
        )

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    prefetchQueue = Queue(maxsize=50)
    uploadQueue = Queue(maxsize=200)

    imageFetcherThread = imageFetcher(pregenerator, prefetchQueue)
    userInteractionThread = userInteraction(prefetchQueue, uploadQueue)
    uploaderThread = uploader(uploadQueue)

    imageFetcherThread.daemon = False
    userInteractionThread.daemon = False
    uploaderThread.daemon = False

    if autonomous:
        pywikibot.output(
            u'Bot is running in autonomous mode. There will be no user interaction.'
        )
        userInteractionThread.setAutonomous()

    if not checkTemplate:
        pywikibot.output(
            u'No check template will be added to the uploaded files.')
        uploaderThread.nochecktemplate()

    fetchDone = imageFetcherThread.start()
    userDone = userInteractionThread.start()
    uploadDone = uploaderThread.start()
Esempio n. 24
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    always = False

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpSelflinkPageGenerator(xmlFilename)
        elif arg == '-sql':
            # NOT WORKING YET
            query = """
SELECT page_namespace, page_title
FROM page JOIN pagelinks JOIN text ON (page_id = pl_from AND page_id = old_id)
WHERE pl_title = page_title
AND pl_namespace = page_namespace
AND page_namespace = 0
AND (old_text LIKE concat('%[[', page_title, ']]%')
    OR old_text LIKE concat('%[[', page_title, '|%'))
LIMIT 100"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('selflink')
    else:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = SelflinkBot(preloadingGen, always)
        bot.run()
Esempio n. 25
0
def crawlerCat(category):
    cat = catlib.Category(site, category)
    pages = cat.articlesList(False)
    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        modification(Page.title())
    subcat = cat.subcategories(recurse=True)
    for subcategory in subcat:
        '''
		if subcategory == u'[[Catégorie:Mammifère disparu]]':
			raw_input("oui")
		else:
			raw_input("non")
		'''
        pages = subcategory.articlesList(False)
        for Page in pagegenerators.PreloadingGenerator(pages, 100):
            #if not crawlerFile(Page.title()):
            modification(Page.title())
Esempio n. 26
0
def main():
    summary_commandline,template,gen = None,None,None
    exceptions,PageTitles,namespaces = [],[],[]
    cat=''
    autoText,autoTitle = False,False
    genFactory = pagegenerators.GeneratorFactory()
    arg=False#------if you dont want to work with arguments leave it False if you want change it to True---
    if arg==False:
        for arg in wikipedia.handleArgs():
            if arg == '-autotitle':
                autoTitle = True
            elif arg == '-autotext':
                autoText = True
            elif arg.startswith( '-page:' ):
                if len(arg) == 6:
                    PageTitles.append(wikipedia.input( u'Which page do you want to chage?' ))
                else:
                    PageTitles.append(arg[6:])
            elif arg.startswith( '-cat:' ):
                if len(arg) == 5:
                    cat=wikipedia.input( u'Which Category do you want to chage?' )
                else:
                    cat='Category:'+arg[5:]
            elif arg.startswith( '-template:' ):
                if len(arg) == 10:
                    template.append(wikipedia.input( u'Which Template do you want to chage?' ))
                else:
                    template.append('Template:'+arg[10:])
            elif arg.startswith('-except:'):
                exceptions.append(arg[8:])
            elif arg.startswith( '-namespace:' ):
                namespaces.append( int( arg[11:] ) )
            elif arg.startswith( '-ns:' ):
                namespaces.append( int( arg[4:] ) )    
            elif arg.startswith( '-summary:' ):
                wikipedia.setAction( arg[9:] )
                summary_commandline = True
            else:
                generator = genFactory.handleArg(arg)
                if generator:
                    gen = generator
    else:
        PageTitles = [raw_input(u'Page:> ').decode('utf-8')]
    if cat!='':
        facatfalist=facatlist(cat)
        if facatfalist!=False:
            run(facatfalist)    
    if PageTitles:
        pages = [wikipedia.Page(faSite,PageTitle) for PageTitle in PageTitles]
        gen = iter( pages )
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
    preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 60 )#---number of pages that you want load at same time
    run(preloadingGen)
Esempio n. 27
0
def main():
    # HACK: This can be removed when pywikipedia bug 3315395 has been fixed
    safetyLock = 'birthcat-unlock.dat'
    if not os.path.exists(safetyLock):
        choice = pywikibot.inputChoice(
            u'Have you patched textlib.py in pywikipedia?', ['Yes', 'No'],
            ['y', 'N'], 'N')
        if choice == 'y':
            open(safetyLock, 'w').close()
        else:
            return False
    # END OF HACK

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
    # If dry is True, doesn't do any real changes, but only show
    # what would have been changed.
    dry = False
    # If auto is True, run in autonomous mode.
    auto = False

    # Parse command line arguments
    for arg in pywikibot.handleArgs():
        if arg.startswith("-dry"):
            dry = True
        elif arg.startswith("-auto"):
            auto = True
        else:
            # check if a standard argument like
            # -start:XYZ or -ref:Asdf was given.
            if not genFactory.handleArg(arg):
                pageTitleParts.append(arg)

    if pageTitleParts != []:
        # We will only work on a single page.
        pageTitle = ' '.join(pageTitleParts)
        page = pywikibot.Page(pywikibot.getSite(), pageTitle)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = BirthCatBot(gen, auto, dry)
        bot.run()
    else:
        pywikibot.showHelp()
Esempio n. 28
0
def modification(PageHS):
    page = Page(site, PageHS)
    if page.exists():
        if page.namespace() != 0 and page.title(
        ) != u'Utilisateur:JackBot/test':
            return
        else:
            try:
                PageEnd = page.getRedirectTarget()
            except wikipedia.NoPage:
                print "NoPage"
                return
            gen2 = pagegenerators.ReferringPageGenerator(page)
            for PageCourante in pagegenerators.PreloadingGenerator(gen2, 100):
                print(PageCourante.title().encode(config.console_encoding,
                                                  'replace'))
                try:
                    PageBegin = PageCourante.get()
                except wikipedia.NoPage:
                    print "NoPage"
                    return
                except wikipedia.IsRedirectPage:
                    print "Redirect page"
                    return
                except wikipedia.LockedPage:
                    print "Locked/protected page"
                    return
                except wikipedia.ServerError:
                    print "ServerError"
                    return
                except wikipedia.NoSuchSite:
                    print "NoSuchSite"
                    return
                except wikipedia.InvalidTitle:
                    print "InvalidTitle"
                    return
                PageTemp = PageBegin
                while PageTemp.find(u'[[' + PageHS + u']]') != -1:
                    PageTemp = PageTemp[
                        0:PageTemp.find(u'[[' + PageHS + u']]') +
                        2] + PageEnd.title() + u'|' + PageHS + PageTemp[
                            PageTemp.find(u'[[' + PageHS + u']]') +
                            len(u'[[' + PageHS + u']]') - 2:len(PageTemp)]
                while PageTemp.find(u'[[' + PageHS + u'|') != -1:
                    PageTemp = PageTemp[
                        0:PageTemp.find(u'[[' + PageHS + u'|') +
                        2] + PageEnd.title(
                        ) + PageTemp[PageTemp.find(u'[[' + PageHS + u'|') +
                                     len(u'[[' + PageHS + u'|') -
                                     1:len(PageTemp)]
                if PageTemp != PageBegin: sauvegarde(PageCourante, PageTemp)
            if PageHS.find(u'/') != -1 or PageHS.find(u' - ') != -1:
                page.delete(u'Suppression après gestion des pages liées',
                            u'',
                            throttle=True)
Esempio n. 29
0
def main():
    genFactory = pagegenerators.GeneratorFactory()

    PageTitles = []
    xmlFilename = None
    always = False
    ignorepdf = False
    limit = None
    namespaces = []
    generator = None
    for arg in pywikibot.handleArgs():
        if arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-summary:'):
            pywikibot.setAction(arg[9:])
        elif arg == '-always':
            always = True
        elif arg == '-ignorepdf':
            ignorepdf = True
        elif arg.startswith('-limit:'):
            limit = int(arg[7:])
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
        else:
            genFactory.handleArg(arg)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        generator = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces)
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('reflinks')
        return
    generator = pagegenerators.PreloadingGenerator(generator, pageNumber=50)
    generator = pagegenerators.RedirectFilterPageGenerator(generator)
    bot = ReferencesRobot(generator, always, limit, ignorepdf)
    bot.run()
def crawlerCat(category, recursif, apres):
    modifier = u'False'
    cat = catlib.Category(site, category)
    pages = cat.articlesList(False)
    #gen =  pagegenerators.NamespaceFilterPageGenerator(pages, [ns]) HS sur Commons
    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        if not apres or apres == u'' or modifier == u'True':
            modification(Page.title())  #crawlerLink(Page.title())
        elif Page.title() == apres:
            modifier = u'True'
    if recursif == True:
        subcat = cat.subcategories(recurse=True)
        for subcategory in subcat:
            if subcategory.title().find(u'.ogg') == -1 and subcategory.title(
            ).find(u'spoken') == -1 and subcategory.title().find(
                    u'Wikipedia') == -1 and subcategory.title().find(
                        u'Wikinews') == -1:
                pages = subcategory.articlesList(False)
                for Page in pagegenerators.PreloadingGenerator(pages, 100):
                    modification(Page.title())