Esempio n. 1
0
def getLinks(wtext):
    #adapted from linkedPages() http://svn.wikimedia.org/svnroot/pywikipedia/trunk/pywikipedia/wikipedia.py
    links = []
    wtext = wikipedia.removeLanguageLinks(wtext, site)
    wtext = wikipedia.removeCategoryLinks(wtext, site)
    # remove HTML comments, pre, nowiki, and includeonly sections
    # from text before processing
    wtext = wikipedia.removeDisabledParts(wtext)
    # resolve {{ns:-1}} or {{ns:Help}}
    wtext = site.resolvemagicwords(wtext)
    for match in Rlink.finditer(wtext):
        title = match.group('title')
        title = title.replace("_", " ").strip(" ")
        if title.startswith("#"):  # this is an internal section link
            continue
        if not site.isInterwikiLink(title):
            if title.startswith("#"):  # [[#intrasection]] same article
                continue
            title = title.split('#')[
                0]  # removing sections [[other article#section|blabla]]
            title = '%s%s' % (title[:1].upper(), title[1:])  #first up
            title = title.strip()
            if title.startswith(":") or title.startswith(
                    "File:") or title.startswith("Image:") or title.startswith(
                        "Category:"):  # files, cats, etc
                continue
            if title and title not in links:
                links.append(title)

    return links
Esempio n. 2
0
def buildDescription(flinfoDescription=u'',
                     flickrreview=False,
                     reviewer=u'',
                     override=u'',
                     addCategory=u'',
                     removeCategories=False):
    ''' Build the final description for the image. The description is based on
    the info from flickrinfo and improved.

    '''
    description = flinfoDescription
    if removeCategories:
        description = pywikibot.removeCategoryLinks(
            description, pywikibot.getSite('commons', 'commons'))
    if override:
        description = description.replace(u'{{cc-by-sa-2.0}}\n', u'')
        description = description.replace(u'{{cc-by-2.0}}\n', u'')
        description = description.replace(u'{{flickrreview}}\n', u'')
        description = description.replace(
            u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n',
            u'')
        description = description.replace(u'=={{int:license}}==',
                                          u'=={{int:license}}==\n' + override)
    elif flickrreview:
        if reviewer:
            description = description.replace(
                u'{{flickrreview}}', u'{{flickrreview|' + reviewer +
                '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}'
            )
    if addCategory:
        description = description.replace(u'{{subst:unc}}\n', u'')
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'',
                     override=u'', addCategory=u'', removeCategories=False, photoInfo=None):
    ''' Build the final description for the image. The description is based on
    the info from flickrinfo and improved.

    '''
    description = flinfoDescription
    description = description.replace(u'|Description=', u'|Description={{' + ripper_config['lang'] + '|1=');
    description = description.replace(u'\n|Source=', u'}}\n{{' + ripper_config['monument_template'] + '|' + getMonumentId(photoInfo) + '}}\n|Source=');
    if removeCategories:
        description = pywikibot.removeCategoryLinks(description,
                                                    pywikibot.getSite(
                                                        'commons', 'commons'))
    if override:
        description = description.replace(u'{{cc-by-sa-2.0}}\n', u'')
        description = description.replace(u'{{cc-by-2.0}}\n', u'')
        description = description.replace(u'{{flickrreview}}\n', u'')
        description = description.replace(
            u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n',
            u'')
        description = description.replace(u'=={{int:license}}==',
                                          u'=={{int:license}}==\n' + override)
    elif flickrreview:
        if reviewer:
            description = description.replace(u'{{flickrreview}}',
                                              u'{{flickrreview|' + reviewer +
                                              '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}')
											  
    description = description + u'\n{{Wiki Loves Monuments 2011|' + ripper_config['country'] + '}}'
    description = description + u'\n' + ripper_config['categories'] + '\n'
    description = description.replace(u'{{subst:unc}}\n', u'')
    if addCategory:
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
def cleanUpCategories(description =''):
    '''
    Filter the categories in the description using the functions in imagerecat
    '''
    #Get the list of current categories
    categoryPages = wikipedia.getCategoryLinks(description, wikipedia.getSite())

    #Make it a list of strings (not page objects)
    categories = []
    for cat in categoryPages:
	categories.append(cat.titleWithoutNamespace())

    #Strip the categories of the current description
    description = wikipedia.removeCategoryLinks(description, wikipedia.getSite())    

    #Filter the list of categories
    categories = imagerecat.applyAllFilters(categories)
    
    #If we have a category, remove the uncat template
    if not (categories==''):
	description = description.replace(u'{{subst:unc}}', u'')

    #Add the categories to the description again
    description = description + u'\n'
    for category in categories:
	#print u'Category : ' + category
	description = description + u'[[Category:' + category + u']]\n'
    return description
Esempio n. 5
0
def cleanUpCategories(description=''):
    '''
    Filter the categories in the description using the functions in imagerecat
    '''
    #Get the list of current categories
    categoryPages = wikipedia.getCategoryLinks(description,
                                               wikipedia.getSite())

    #Make it a list of strings (not page objects)
    categories = []
    for cat in categoryPages:
        categories.append(cat.titleWithoutNamespace())

    #Strip the categories of the current description
    description = wikipedia.removeCategoryLinks(description,
                                                wikipedia.getSite())

    #Filter the list of categories
    categories = imagerecat.applyAllFilters(categories)

    #If we have a category, remove the uncat template
    if not (categories == ''):
        description = description.replace(u'{{subst:unc}}', u'')

    #Add the categories to the description again
    description = description + u'\n'
    for category in categories:
        #print u'Category : ' + category
        description = description + u'[[Category:' + category + u']]\n'
    return description
def replaceCategory (image = None, parents = [], newcat = u''):
    '''
    Remove all parent categories and add newcat
    '''
    result = 0
    newcats = []
    if not newcat == u'':    
        currentCats = imagerecat.getCurrentCats(image)
	workingCategories = currentCats
	workingCategories.append(newcat)
	# Adding parents if the category filter is lagging.
	# The bot often works on new categories. In these cases the filter does know the parent categories
	workingCategories = workingCategories + parents
        for cat in imagerecat.applyAllFilters(workingCategories):
	    #Now remove those parents again
	    if cat not in parents:
                newcats.append(cat)
	if not(set(currentCats)==set(newcats)):
	    newtext = wikipedia.removeCategoryLinks(image.get(), image.site()) + u'\n'
	    for category in newcats:
	        newtext = newtext + u'[[Category:' + category + u']]\n'
	    comment = u'Moving image to (a subcategory of) [[Category:' + newcat + u']] and trying to filter categories'
	    wikipedia.output(image.title())
	    wikipedia.showDiff(image.get(), newtext)
	    image.put(newtext, comment)
            result = 1
    return result
Esempio n. 7
0
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'',
                     override=u'', addCategory=u'', removeCategories=False):
    ''' Build the final description for the image. The description is based on
    the info from flickrinfo and improved.

    '''
    description = flinfoDescription
    if removeCategories:
        description = pywikibot.removeCategoryLinks(description,
                                                    pywikibot.getSite(
                                                        'commons', 'commons'))
    if override:
        description = description.replace(u'{{cc-by-sa-2.0}}\n', u'')
        description = description.replace(u'{{cc-by-2.0}}\n', u'')
        description = description.replace(u'{{flickrreview}}\n', u'')
        description = description.replace(
            u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n',
            u'')
        description = description.replace(u'=={{int:license}}==',
                                          u'=={{int:license}}==\n' + override)
    elif flickrreview:
        if reviewer:
            description = description.replace(u'{{flickrreview}}',
                                              u'{{flickrreview|' + reviewer +
                                              '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}')
    if addCategory:
        description = description.replace(u'{{subst:unc}}\n', u'')
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
Esempio n. 8
0
def putAfterTemplate(page, template, toadd, loose=True):
    '''
    Try to put text after template.
    If the template is not found return False if loose is set to False
    If loose is set to True: Remove interwiki's, categories, add template, restore categories, restore interwiki's.

    Based on cc-by-sa-3.0 code by Dschwen
    '''
    oldtext = page.get()
    newtext = u''

    templatePosition = oldtext.find(u'{{%s' % (template, ))

    if templatePosition >= 0:
        previousChar = u''
        currentChar = u''
        templatePosition += 2
        curly = 1
        square = 0

        while templatePosition < len(oldtext):
            currentChar = oldtext[templatePosition]

            if currentChar == u'[' and previousChar == u'[':
                square += 1
                previousChar = u''
            if currentChar == u']' and previousChar == u']':
                square -= 1
                previousChar = u''
            if currentChar == u'{' and previousChar == u'{':
                curly += 1
                previousChar = u''
            if currentChar == u'}' and previousChar == u'}':
                curly -= 1
                previousChar = u''

            previousChar = currentChar
            templatePosition += 1

            if curly == 0 and square <= 0:
                # Found end of template
                break
        newtext = oldtext[:templatePosition] + u'\n' + toadd + oldtext[
            templatePosition:]

    else:
        if loose:
            newtext = oldtext
            cats = wikipedia.getCategoryLinks(newtext)
            ll = wikipedia.getLanguageLinks(newtext)
            nextext = wikipedia.removeLanguageLinks(newtext)
            newtext = wikipedia.removeCategoryLinks(newtext)
            newtext = newtext + u'\n' + toadd
            newtext = wikipedia.replaceCategoryLinks(newtext, cats)
            newtext = wikipedia.replaceLanguageLinks(newtext, ll)

    return newtext
def putAfterTemplate (page, template, toadd, loose=True):
    '''
    Try to put text after template.
    If the template is not found return False if loose is set to False
    If loose is set to True: Remove interwiki's, categories, add template, restore categories, restore interwiki's.

    Based on cc-by-sa-3.0 code by Dschwen
    '''
    oldtext = page.get()
    newtext = u''

    templatePosition = oldtext.find(u'{{%s' % (template,))

    if templatePosition >= 0:
	previousChar = u''
	currentChar = u''
	templatePosition += 2
	curly = 1
	square = 0
	
	while templatePosition < len(oldtext):
	    currentChar = oldtext[templatePosition]

	    if currentChar == u'[' and previousChar == u'[' :
		square += 1
                previousChar = u''
            if currentChar == u']' and previousChar == u']' :
                square -= 1
                previousChar = u''
            if currentChar == u'{' and previousChar == u'{' :
                curly += 1
                previousChar = u''
            if currentChar == u'}' and previousChar == u'}' :
                curly -= 1
                previousChar = u''

	    previousChar = currentChar
	    templatePosition +=1

	    if curly == 0 and square <= 0 :
		# Found end of template
		break
	newtext = oldtext[:templatePosition] + u'\n' + toadd + oldtext[templatePosition:]
    
    else:
	if loose:
	    newtext = oldtext
	    cats = wikipedia.getCategoryLinks(newtext)
	    ll = wikipedia.getLanguageLinks(newtext)
	    nextext = wikipedia.removeLanguageLinks (newtext)
	    newtext = wikipedia.removeCategoryLinks(newtext)
	    newtext = newtext + u'\n' + toadd
	    newtext = wikipedia.replaceCategoryLinks(newtext, cats)
	    newtext = wikipedia.replaceLanguageLinks(newtext, ll)
    
    return newtext
Esempio n. 10
0
def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
    ''' Remove the old categories and add the new categories to the image. '''
    newtext = pywikibot.removeCategoryLinks(imagepage.get(), imagepage.site())
    if not(onlyFilter):
        newtext = removeTemplates(newtext)
        newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
                                                       len(newcats))
    newtext = newtext + u'\n'
    for category in newcats:
        newtext = newtext + u'[[Category:' + category + u']]\n'
    if(onlyFilter):
        comment = u'Filtering categories'
    else:
        comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]'
    pywikibot.showDiff(imagepage.get(), newtext)
    imagepage.put(newtext, comment)
    return
Esempio n. 11
0
def categorizeImage(page, id, cursor, cursor2):
    # get metadata
    metadata = getMetadata(id, cursor)
    # get current text
    oldtext = page.get()
    # get current categories
    currentCategories =[]
    for cat in page.categories():
	currentCategories.append(cat.titleWithoutNamespace().strip().replace(u' ', u'_'))
    # remove templates
    cleanDescription = wikipedia.removeCategoryLinks(imagerecat.removeTemplates(page.get()), wikipedia.getSite())
    # get new categories
    categories = getCategories(metadata, cursor, cursor2, currentCategories)

    if categories and not set(currentCategories)==set(categories):
	description = cleanDescription + u'\n\n' +  categories
	comment = u'Trying to find better categories for this [[Commons:Batch uploading/Geograph|Geograph]] image'
	wikipedia.output(description)
	wikipedia.showDiff(oldtext, description)
	page.put(description, comment)
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', addCategory=u'', removeCategories=False, rijksmonumentid=1):
    '''
    Build the final description for the image. The description is based on the info from flickrinfo and improved.
    '''
    description = flinfoDescription

    description = description.replace(u'\n|Source=[http://www.flickr.com/', u'\n{{Rijksmonument|%s}}\n|Source=[http://www.flickr.com/' % (rijksmonumentid,))

    if removeCategories:
        description = wikipedia.removeCategoryLinks(description, wikipedia.getSite('commons', 'commons'))
    
    if flickrreview:
        if reviewer:
            description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}')
    
    if addCategory:
	description = description.replace(u'{{subst:unc}}\n', u'')
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
Esempio n. 13
0
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', addCategory=u'', removeCategories=False, rijksmonumentid=1):
    '''
    Build the final description for the image. The description is based on the info from flickrinfo and improved.
    '''
    description = flinfoDescription

    description = description.replace(u'\n|Source=[http://www.flickr.com/', u'\n{{Rijksmonument|%s}}\n|Source=[http://www.flickr.com/' % (rijksmonumentid,))

    if removeCategories:
        description = wikipedia.removeCategoryLinks(description, wikipedia.getSite('commons', 'commons'))
    
    if flickrreview:
        if reviewer:
            description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}')
    
    if addCategory:
	description = description.replace(u'{{subst:unc}}\n', u'')
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
    def getNewFieldsFromFreetext(self, imagepage):
        '''
        Try to extract fields from free text for the new information template.
        '''
        text = imagepage.get()
        #text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE)
        #text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE)
        #text = re.sub(u'\{\{(self|self2)\|[^\}]+\}\}', u'', text, re.IGNORECASE)

        for toRemove in sourceGarbage[imagepage.site().language()]:
            text = re.sub(toRemove, u'', text, flags=re.IGNORECASE)

        for (regex, repl) in licenseTemplates[imagepage.site().language()]:
            text = re.sub(regex, u'', text, flags=re.IGNORECASE)

        text = pywikibot.removeCategoryLinks(text, imagepage.site()).strip()

        description = self.convertLinks(text.strip(), imagepage.site())
        date = self.getUploadDate(imagepage)
        source = self.getSource(imagepage)
        author = self.getAuthorText(imagepage)
        return (description, date, source, author)
Esempio n. 15
0
def categorizeImage(page, id, cursor, cursor2):
    # get metadata
    metadata = getMetadata(id, cursor)
    # get current text
    oldtext = page.get()
    # get current categories
    currentCategories = []
    for cat in page.categories():
        currentCategories.append(cat.titleWithoutNamespace().strip().replace(
            u' ', u'_'))
    # remove templates
    cleanDescription = wikipedia.removeCategoryLinks(
        imagerecat.removeTemplates(page.get()), wikipedia.getSite())
    # get new categories
    categories = getCategories(metadata, cursor, cursor2, currentCategories)

    if categories and not set(currentCategories) == set(categories):
        description = cleanDescription + u'\n\n' + categories
        comment = u'Trying to find better categories for this [[Commons:Batch uploading/Geograph|Geograph]] image'
        wikipedia.output(description)
        wikipedia.showDiff(oldtext, description)
        page.put(description, comment)
Esempio n. 16
0
    def getNewFieldsFromFreetext(self, imagepage):
        '''
        Try to extract fields from free text for the new information template.
        '''
        text = imagepage.get()
        #text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE)
        #text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE)
        #text = re.sub(u'\{\{(self|self2)\|[^\}]+\}\}', u'', text, re.IGNORECASE)

        for toRemove in sourceGarbage[imagepage.site().language()]:
            text = re.sub(toRemove, u'', text, flags=re.IGNORECASE)

        for (regex, repl) in licenseTemplates[imagepage.site().language()]:
            text = re.sub(regex, u'', text, flags=re.IGNORECASE)

        text = pywikibot.removeCategoryLinks(text, imagepage.site()).strip()

        description = self.convertLinks(text.strip(), imagepage.site())
        date = self.getUploadDate(imagepage)
        source = self.getSource(imagepage)
        author = self.getAuthorText(imagepage)
        return (description, date, source, author)
Esempio n. 17
0
def add_text(
    page=None,
    addText=None,
    summary=None,
    regexSkip=None,
    regexSkipUrl=None,
    always=False,
    up=False,
    putText=True,
    oldTextGiven=None,
    reorderEnabled=True,
    create=False,
):
    if not addText:
        raise NoEnoughData("You have to specify what text you want to add!")
    if not summary:
        summary = i18n.twtranslate(pywikibot.getSite(), "add_text-adding", {"adding": addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0
    site = pywikibot.getSite()
    pathWiki = site.family.nicepath(site.lang)

    if putText:
        pywikibot.output(u"Loading %s..." % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!" % page.title())
                text = u""
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = "%s%s" % (pathWiki, page.urlname())
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
                u"""Exception! regex (or word) used with -exceptUrl is in the page. Skip!
Match was: %s"""
                % result
            )
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
                u"""Exception! regex (or word) used with -except is in the page. Skip!
Match was: %s"""
                % result
            )
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace("\\n", config.line_separator)
        if reorderEnabled:
            # Getting the categories
            categoriesInside = pywikibot.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = pywikibot.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = pywikibot.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = pywikibot.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = pywikibot.replaceCategoryLinks(newtext, categoriesInside, site, True)
            # Dealing the stars' issue
            allstars = []
            starstext = pywikibot.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile("(\{\{(?:template:|)%s\|.*?\}\}[\s]*)" % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub("", newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += "%s%s" % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.inputChoice(
                    u"Do you want to accept these changes?",
                    ["Yes", "No", "All", "open in Browser"],
                    ["y", "n", "a", "b"],
                    "n",
                )
                if choice == "a":
                    always = True
                elif choice == "n":
                    return (False, False, always)
                elif choice == "b":
                    webbrowser.open("http://%s%s" % (page.site.hostname(), page.site.nice_get_address(page.title())))
                    pywikibot.input("Press Enter when finished in browser.")
            if always or choice == "y":
                try:
                    if always:
                        page.put(newtext, summary, minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext, summary, minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u"Edit conflict! skip!")
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.maxretries:
                        pywikibot.output(u"Server Error! Wait..")
                        time.sleep(5)
                        continue
                    else:
                        raise pywikibot.ServerError(u"Fifth Server Error!")
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(u"Cannot change %s because of blacklist entry %s" % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u"Error putting page: %s" % error.args)
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u"Skipping %s (locked page)" % page.title())
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Esempio n. 18
0
def add_text(page = None, addText = None, summary = None, regexSkip = None, regexSkipUrl = None,
             always = False, up = False, putText = True, oldTextGiven = None):
    if not addText:
        raise NoEnoughData('You have to specify what text you want to add!')
    if not summary:
        summary = wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg) % addText)

    # When a page is tagged as "really well written" it has a star in the interwiki links.
    # This is a list of all the templates used (in regex format) to make the stars appear.
    starsList = ['link[ _]fa', 'link[ _]adq', 'enllaç[ _]ad',
                 'link[ _]ua', 'legătură[ _]af', 'destacado',
                 'ua', 'liên k[ _]t[ _]chọn[ _]lọc']

    errorCount = 0
    site = wikipedia.getSite()
    # /wiki/ is not always the right path in non-wiki projects
    pathWiki = site.family.nicepath(site.lang)

    if putText:
        wikipedia.output(u'Loading %s...' % page.title())
    if oldTextGiven == None:
        try:
            text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"%s doesn't exist, skip!" % page.title())
            return (False, always) # continue
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"%s is a redirect, skip!" % page.title())
            return (False, always) # continue
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl != None:
        url = '%s%s' % (pathWiki, page.urlname())
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            wikipedia.output(u'Exception! regex (or word) used with -exceptUrl is in the page. Skip!')
            return (False, always) # continue
    if regexSkip != None:
        result = re.findall(regexSkip, text)
        if result != []:
            wikipedia.output(u'Exception! regex (or word) used with -except is in the page. Skip!')
            return (False, always) # continue
    # If not up, text put below
    if not up:
        newtext = text
        # Getting the categories
        categoriesInside = wikipedia.getCategoryLinks(newtext, site)
        # Deleting the categories
        newtext = wikipedia.removeCategoryLinks(newtext, site)
        # Getting the interwiki
        interwikiInside = wikipedia.getLanguageLinks(newtext, site)
        # Removing the interwiki
        newtext = wikipedia.removeLanguageLinks(newtext, site)
        #nn got a message between the categories and the iw's and they want to keep it there, first remove it
        if (site.language()==u'nn'):
            newtext = newtext.replace(nn_iw_msg, '')
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        # Adding the text
        newtext += u"\n%s" % addText
        # Reputting the categories
        newtext = wikipedia.replaceCategoryLinks(newtext, categoriesInside, site, True)
        #Put the nn iw message back
        if (site.language()==u'nn'):
            newtext = newtext + u'\n' + nn_iw_msg       
        # Dealing the stars' issue
        starsListInPage = list()
        for star in starsList:
            regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}\n)' % star, re.I)
            risultato = regex.findall(newtext)
            if risultato != []:
                newtext = regex.sub('', newtext)
                for element in risultato:
                    newtext += '\n%s' % element
        # Adding the interwiki
        newtext = wikipedia.replaceLanguageLinks(newtext, interwikiInside, site)
    # If instead the text must be added above...
    else:
        newtext = addText + '\n' + text
    if putText and text != newtext:
        wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
        wikipedia.showDiff(text, newtext)
    choice = ''
    # Let's put the changes.
    while 1:
        # If someone load it as module, maybe it's not so useful to put the text in the page
        if putText:
            if not always:
                choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
            if choice == 'a':
                always = True
            if choice == 'n':
                return (False, always)
            if choice == 'y' or always:
                try:
                    if always:
                        page.put(newtext, summary)
                    else:
                        page.put_async(newtext, summary)
                except wikipedia.EditConflict:
                    wikipedia.output(u'Edit conflict! skip!')
                    return (False, always)
                except wikipedia.ServerError:
                    errorCount += 1
                    if errorCount < 5:
                        wikipedia.output(u'Server Error! Wait..')
                        time.sleep(3)
                        continue
                    else:
                        raise wikipedia.ServerError(u'Fifth Server Error!')
                except wikipedia.SpamfilterError, e:
                    wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
                    return (False, always)
                except wikipedia.PageNotSaved, error:
                    wikipedia.output(u'Error putting page: %s' % error.args)
                    return (False, always)
                except wikipedia.LockedPage:
                    wikipedia.output(u'Skipping %s (locked page)' % page.title())
                    return (False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, always)
def add_text(page = None, addText = None, summary = None, regexSkip = None,
             regexSkipUrl = None, always = False, up = False, putText = True,
             oldTextGiven = None, reorderEnabled = True, create=False):
    if not addText:
        raise NoEnoughData('You have to specify what text you want to add!')
    if not summary:
        summary = i18n.twtranslate(pywikibot.getSite(), 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.
    starsList = [
        u'bueno',
        u'bom interwiki',
        u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
        u'destacado', u'destaca[tu]',
        u'enllaç[ _]ad',
        u'enllaz[ _]ad',
        u'leam[ _]vdc',
        u'legătură[ _]a[bcf]',
        u'liamm[ _]pub',
        u'lien[ _]adq',
        u'lien[ _]ba',
        u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
        u'liên[ _]kết[ _]chọn[ _]lọc',
        u'ligam[ _]adq',
        u'ligoelstara',
        u'ligoleginda',
        u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
        u'link[ _]sm', u'linkfa',
        u'na[ _]lotura',
        u'nasc[ _]ar',
        u'tengill[ _][úg]g',
        u'ua',
        u'yüm yg',
        u'רא',
        u'وصلة مقالة جيدة',
        u'وصلة مقالة مختارة',
    ]

    errorCount = 0
    site = pywikibot.getSite()
    # /wiki/ is not always the right path in non-wiki projects
    pathWiki = site.family.nicepath(site.lang)

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven == None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always) # continue
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always) # continue
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl != None:
        url = '%s%s' % (pathWiki, page.urlname())
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
u'''Exception! regex (or word) used with -exceptUrl is in the page. Skipping!
Match was: %s''' % result)
            return (False, False, always) # continue
    if regexSkip != None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
u'''Exception! regex (or word) used with -except is in the page. Skipping!
Match was: %s''' % result)
            return (False, False, always) # continue
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = pywikibot.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = pywikibot.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = pywikibot.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = pywikibot.removeLanguageLinks(newtext, site)
            # nn got a message between the categories and the iw's
            # and they want to keep it there, first remove it
            hasCommentLine = False
            if (site.language()==u'nn'):
                regex = re.compile('(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)')
                found = regex.findall(newtext)
                if found:
                    hasCommentLine = True
                    newtext = regex.sub('', newtext)

            # Adding the text
            newtext += u"\n%s" % addText
            # Reputting the categories
            newtext = pywikibot.replaceCategoryLinks(newtext,
                                                 categoriesInside, site, True)
            #Put the nn iw message back
            if site.language()==u'nn' and (interwikiInside or hasCommentLine):
                newtext = newtext + u'\r\n\r\n' + nn_iw_msg
            # Dealing the stars' issue
            allstars = []
            starstext = pywikibot.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star,
                                   re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip()+'\r\n\r\n'
                allstars.sort()
                for element in allstars:
                    newtext += '%s\r\n' % element.strip()
            # Adding the interwiki
            newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site)
        else:
            # Adding the text
            newtext += u"\n%s" % addText
    # If instead the text must be added above...
    else:
        newtext = addText + '\n' + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.inputChoice(
                    u'Do you want to accept these changes?',
                    ['Yes', 'No', 'All', 'open in Browser'], ['y', 'N', 'a', 'b'], 'N')
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    webbrowser.open("http://%s%s" % (
                        page.site().hostname(),
                        page.site().nice_get_address(page.title())
                    ))
                    pywikibot.input("Press Enter when finished in browser.")
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary)
                    else:
                        page.put_async(newtext, summary)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < 5:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(5)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError, e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.PageNotSaved, error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
Esempio n. 20
0
def add_text(page=None, addText=None, summary=None, regexSkip=None,
             regexSkipUrl=None, always=False, up=False, putText=True,
             oldTextGiven=None, reorderEnabled=True, create=False):
    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.
    starsList = [
        u'bueno',
        u'bom interwiki',
        u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
        u'destacado', u'destaca[tu]',
        u'enllaç[ _]ad',
        u'enllaz[ _]ad',
        u'leam[ _]vdc',
        u'legătură[ _]a[bcf]',
        u'liamm[ _]pub',
        u'lien[ _]adq',
        u'lien[ _]ba',
        u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
        u'liên[ _]kết[ _]chọn[ _]lọc',
        u'ligam[ _]adq',
        u'ligoelstara',
        u'ligoleginda',
        u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
        u'link[ _]sm', u'linkfa',
        u'na[ _]lotura',
        u'nasc[ _]ar',
        u'tengill[ _][úg]g',
        u'ua',
        u'yüm yg',
        u'רא',
        u'وصلة مقالة جيدة',
        u'وصلة مقالة مختارة',
    ]

    errorCount = 0
    site = pywikibot.getSite()
    pathWiki = site.family.nicepath(site.lang)
    site = pywikibot.getSite()
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = pywikibot.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = pywikibot.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = pywikibot.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = pywikibot.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"\n%s" % addText
            # Reputting the categories
            newtext = pywikibot.replaceCategoryLinks(newtext,
                                                     categoriesInside, site,
                                                     True)
            # Dealing the stars' issue
            allstars = []
            starstext = pywikibot.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + '\r\n\r\n'
                allstars.sort()
                for element in allstars:
                    newtext += '%s\r\n' % element.strip()
            # Adding the interwiki
            newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside,
                                                     site)
        else:
            newtext += u"\n%s" % addText
    else:
        newtext = addText + '\n' + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        #pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if always or choice == 'y':
                try:
                    pass
                    if always:
                        page.put(newtext, summary,
                                 minorEdit=False)
                    else:
                        page.put_async(newtext, summary,
                                       minorEdit=False)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < 5:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(5)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError, e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.PageNotSaved, error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
Esempio n. 21
0
def add_text(
    page=None,
    addText=None,
    summary=None,
    regexSkip=None,
    regexSkipUrl=None,
    always=False,
    up=False,
    putText=True,
    oldTextGiven=None,
    create=False,
):
    if not addText:
        raise NoEnoughData("You have to specify what text you want to add!")
    if not summary:
        summary = wikipedia.translate(wikipedia.getSite(), msg) % addText[:200]

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.
    starsList = [
        u"bueno",
        u"cyswllt[ _]erthygl[ _]ddethol",
        u"dolen[ _]ed",
        u"destacado",
        u"destaca[tu]",
        u"enllaç[ _]ad",
        u"enllaz[ _]ad",
        u"leam[ _]vdc",
        u"legătură[ _]a[bcf]",
        u"liamm[ _]pub",
        u"lien[ _]adq",
        u"lien[ _]ba",
        u"liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt",
        u"liên[ _]kết[ _]chọn[ _]lọc",
        u"ligam[ _]adq",
        u"ligoelstara",
        u"ligoleginda",
        u"link[ _][afgu]a",
        u"link[ _]adq",
        u"link[ _]f[lm]",
        u"link[ _]km",
        u"link[ _]sm",
        u"linkfa",
        u"na[ _]lotura",
        u"nasc[ _]ar",
        u"tengill[ _][úg]g",
        u"ua",
        u"yüm yg",
        u"רא",
        u"وصلة مقالة جيدة",
        u"وصلة مقالة مختارة",
    ]

    errorCount = 0
    site = wikipedia.getSite()
    # /wiki/ is not always the right path in non-wiki projects
    pathWiki = site.family.nicepath(site.lang)

    if putText:
        wikipedia.output(u"Loading %s..." % page.title())
    if oldTextGiven == None:
        try:
            text = page.get()
        except wikipedia.NoPage:
            if create:
                wikipedia.output(u"%s doesn't exist, creating it!" % page.title())
                text = u""
            else:
                wikipedia.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)  # continue
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)  # continue
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl != None:
        url = "%s%s" % (pathWiki, page.urlname())
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            wikipedia.output(u"Exception! regex (or word) used with -exceptUrl is in the page. Skip!")
            return (False, False, always)  # continue
    if regexSkip != None:
        result = re.findall(regexSkip, text)
        if result != []:
            wikipedia.output(u"Exception! regex (or word) used with -except is in the page. Skip!")
            return (False, False, always)  # continue
    # If not up, text put below
    if not up:
        newtext = text
        # Getting the categories
        categoriesInside = wikipedia.getCategoryLinks(newtext, site)
        # Deleting the categories
        newtext = wikipedia.removeCategoryLinks(newtext, site)
        # Getting the interwiki
        interwikiInside = wikipedia.getLanguageLinks(newtext, site)
        # Removing the interwiki
        newtext = wikipedia.removeLanguageLinks(newtext, site)
        # nn got a message between the categories and the iw's and they want to keep it there, first remove it
        if site.language() == u"nn":
            newtext = newtext.replace(nn_iw_msg, "")
        # Translating the \\n into binary \n
        addText = addText.replace("\\n", "\n")
        # Adding the text
        newtext += u"\n%s" % addText
        # Reputting the categories
        newtext = wikipedia.replaceCategoryLinks(newtext, categoriesInside, site, True)
        # Put the nn iw message back
        if site.language() == u"nn":
            newtext = newtext + u"\n" + nn_iw_msg
        # Dealing the stars' issue
        allstars = []
        starstext = wikipedia.removeDisabledParts(text)
        for star in starsList:
            regex = re.compile("(\{\{(?:template:|)%s\|.*?\}\}[\s]*)" % star, re.I)
            found = regex.findall(starstext)
            if found != []:
                newtext = regex.sub("", newtext)
                allstars += found
        if allstars != []:
            newtext = newtext.strip() + "\r\n\r\n"
            allstars.sort()
            for element in allstars:
                newtext += "%s\r\n" % element.strip()
        # Adding the interwiki
        newtext = wikipedia.replaceLanguageLinks(newtext, interwikiInside, site)
    # If instead the text must be added above...
    else:
        newtext = addText + "\n" + text
    if putText and text != newtext:
        wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
        wikipedia.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = wikipedia.inputChoice(
                    u"Do you want to accept these changes?", ["Yes", "No", "All"], ["y", "N", "a"], "N"
                )
                if choice == "a":
                    always = True
                elif choice == "n":
                    return (False, False, always)
            if always or choice == "y":
                try:
                    if always:
                        page.put(newtext, summary)
                    else:
                        page.put_async(newtext, summary)
                except wikipedia.EditConflict:
                    wikipedia.output(u"Edit conflict! skip!")
                    return (False, False, always)
                except wikipedia.ServerError:
                    errorCount += 1
                    if errorCount < 5:
                        wikipedia.output(u"Server Error! Wait..")
                        time.sleep(5)
                        continue
                    else:
                        raise wikipedia.ServerError(u"Fifth Server Error!")
                except wikipedia.SpamfilterError, e:
                    wikipedia.output(u"Cannot change %s because of blacklist entry %s" % (page.title(), e.url))
                    return (False, False, always)
                except wikipedia.PageNotSaved, error:
                    wikipedia.output(u"Error putting page: %s" % error.args)
                    return (False, False, always)
                except wikipedia.LockedPage:
                    wikipedia.output(u"Skipping %s (locked page)" % page.title())
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
Esempio n. 22
0
def add_text(page=None,
             addText=None,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    if not addText:
        raise NoEnoughData('You have to specify what text you want to add!')
    if not summary:
        summary = i18n.twtranslate(pywikibot.getSite(), 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.
    starsList = [
        u'bueno',
        u'bom interwiki',
        u'cyswllt[ _]erthygl[ _]ddethol',
        u'dolen[ _]ed',
        u'destacado',
        u'destaca[tu]',
        u'enllaç[ _]ad',
        u'enllaz[ _]ad',
        u'leam[ _]vdc',
        u'legătură[ _]a[bcf]',
        u'liamm[ _]pub',
        u'lien[ _]adq',
        u'lien[ _]ba',
        u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
        u'liên[ _]kết[ _]chọn[ _]lọc',
        u'ligam[ _]adq',
        u'ligoelstara',
        u'ligoleginda',
        u'link[ _][afgu]a',
        u'link[ _]adq',
        u'link[ _]f[lm]',
        u'link[ _]km',
        u'link[ _]sm',
        u'linkfa',
        u'na[ _]lotura',
        u'nasc[ _]ar',
        u'tengill[ _][úg]g',
        u'ua',
        u'yüm yg',
        u'רא',
        u'وصلة مقالة جيدة',
        u'وصلة مقالة مختارة',
    ]

    errorCount = 0
    site = pywikibot.getSite()
    # /wiki/ is not always the right path in non-wiki projects
    pathWiki = site.family.nicepath(site.lang)

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven == None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!" %
                                 page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)  # continue
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)  # continue
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl != None:
        url = '%s%s' % (pathWiki, page.urlname())
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
                u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip!
Match was: %s''' % result)
            return (False, False, always)  # continue
    if regexSkip != None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
                u'''Exception! regex (or word) used with -except is in the page. Skip!
Match was: %s''' % result)
            return (False, False, always)  # continue
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = pywikibot.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = pywikibot.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = pywikibot.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = pywikibot.removeLanguageLinks(newtext, site)
            # nn got a message between the categories and the iw's
            # and they want to keep it there, first remove it
            hasCommentLine = False
            if (site.language() == u'nn'):
                regex = re.compile(
                    '(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)'
                )
                found = regex.findall(newtext)
                if found:
                    hasCommentLine = True
                    newtext = regex.sub('', newtext)

            # Adding the text
            newtext += u"\n%s" % addText
            # Reputting the categories
            newtext = pywikibot.replaceCategoryLinks(newtext, categoriesInside,
                                                     site, True)
            #Put the nn iw message back
            if site.language() == u'nn' and (interwikiInside
                                             or hasCommentLine):
                newtext = newtext + u'\r\n\r\n' + nn_iw_msg
            # Dealing the stars' issue
            allstars = []
            starstext = pywikibot.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(
                    '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + '\r\n\r\n'
                allstars.sort()
                for element in allstars:
                    newtext += '%s\r\n' % element.strip()
            # Adding the interwiki
            newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside,
                                                     site)
        else:
            # Adding the text
            newtext += u"\n%s" % addText
    # If instead the text must be added above...
    else:
        newtext = addText + '\n' + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.inputChoice(
                    u'Do you want to accept these changes?',
                    ['Yes', 'No', 'All', 'open in Browser'],
                    ['y', 'N', 'a', 'b'], 'N')
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    webbrowser.open(
                        "http://%s%s" %
                        (page.site().hostname(), page.site().nice_get_address(
                            page.title())))
                    pywikibot.input("Press Enter when finished in browser.")
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary)
                    else:
                        page.put_async(newtext, summary)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < 5:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(5)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError, e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s' %
                        (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.PageNotSaved, error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)' %
                                     page.title())
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
Esempio n. 23
0
def add_text(page=None, addText=None, summary=None, regexSkip=None,
             regexSkipUrl=None, always=False, up=False, putText=True,
             oldTextGiven=None, reorderEnabled=True, create=False):
    if not addText:
        raise NoEnoughData('You have to specify what text you want to add!')
    if not summary:
        summary = i18n.twtranslate(pywikibot.getSite(), 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0
    site = pywikibot.getSite()
    pathWiki = site.family.nicepath(site.lang)

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = '%s%s' % (pathWiki, page.urlname())
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip!
Match was: %s''' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
u'''Exception! regex (or word) used with -except is in the page. Skip!
Match was: %s''' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = pywikibot.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = pywikibot.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = pywikibot.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = pywikibot.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = pywikibot.replaceCategoryLinks(newtext,
                                                     categoriesInside, site,
                                                     True)
            # Dealing the stars' issue
            allstars = []
            starstext = pywikibot.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside,
                                                     site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.inputChoice(
                    u'Do you want to accept these changes?',
                    ['Yes', 'No', 'All', 'open in Browser'],
                    ['y', 'n', 'a', 'b'], 'n')
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    webbrowser.open("http://%s%s" % (
                        page.site.hostname(),
                        page.site.nice_get_address(page.title())
                    ))
                    pywikibot.input("Press Enter when finished in browser.")
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext, summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.maxretries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(5)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)