Example #1
0
 def fixSyntaxSave(self, text):
     exceptions = [
         'nowiki', 'comment', 'math', 'pre', 'source', 'startspace'
     ]
     # external link in double brackets
     text = pywikibot.replaceExcept(text,
                                    r'\[\[(?P<url>https?://[^\]]+?)\]\]',
                                    r'[\g<url>]', exceptions)
     # external link starting with double bracket
     text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://.+?)\]',
                                    r'[\g<url>]', exceptions)
     # external link and description separated by a dash, with
     # whitespace in front of the dash, so that it is clear that
     # the dash is not a legitimate part of the URL.
     text = pywikibot.replaceExcept(
         text,
         r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
         r'[\g<url> \g<label>]', exceptions)
     # dash in external link, where the correct end of the URL can
     # be detected from the file extension. It is very unlikely that
     # this will cause mistakes.
     text = pywikibot.replaceExcept(
         text,
         r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]',
         r'[\g<url> \g<label>]', exceptions)
     return text
Example #2
0
    def fixSyntaxSave(self, text):
        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                      'startspace']
        # link to the wiki working on
        ## TODO: disable this for difflinks and titled links
        ## http://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&diff=103109563&oldid=103109271
##        text = pywikibot.replaceExcept(text,
##                                       r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]'
##                                       % (self.site.lang, self.site.family.name),
##                                       r'[[\g<link>|\g<title>]]', exceptions)
        # external link in double brackets
        text = pywikibot.replaceExcept(
            text,
            r'\[\[(?P<url>https?://[^\]]+?)\]\]',
            r'[\g<url>]', exceptions)
        # external link starting with double bracket
        text = pywikibot.replaceExcept(text,
                                       r'\[\[(?P<url>https?://.+?)\]',
                                       r'[\g<url>]', exceptions)
        # external link and description separated by a dash, with
        # whitespace in front of the dash, so that it is clear that
        # the dash is not a legitimate part of the URL.
        text = pywikibot.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        # dash in external link, where the correct end of the URL can
        # be detected from the file extension. It is very unlikely that
        # this will cause mistakes.
        text = pywikibot.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        return text
Example #3
0
 def removeUselessSpaces(self, text):
     multipleSpacesR = re.compile('  +')
     spaceAtLineEndR = re.compile(' $')
     exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table',
                   'template']
     text = pywikibot.replaceExcept(text, multipleSpacesR, ' ', exceptions)
     text = pywikibot.replaceExcept(text, spaceAtLineEndR, '', exceptions)
     return text
 def fixArabicLetters(self, text):
     if self.site.lang=='ckb' or self.site.lang=='fa':
         exceptions = [
             'gallery',
             'hyperlink',
             'interwiki',
             # but changes letters inside wikilinks
             #'link',
             'math',
             'pre',
             'template',
             'timeline',
             'ref',
             'source',
             'startspace',
             'inputbox',
         ]
         # do not change inside file links
         namespaces = list(self.site.namespace(6, all = True))
         pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\..+?\]\]',
                              re.UNICODE)
         exceptions.append(pattern)
         text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
         if self.site.lang=='ckb':
             text = pywikibot.replaceExcept(text,
                                            ur'ه([.،_<\]\s])',
                                            ur'ە\1', exceptions)
             text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions)
             text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
         text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
         text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions)
         # replace persian digits
         for i in range(0,10):
             if self.site.lang=='ckb':
                 text = pywikibot.replaceExcept(text,
                                                u'۰۱۲۳۴۵۶۷۸۹'[i],
                                                u'٠١٢٣٤٥٦٧٨٩'[i], exceptions)
             else:
                 text = pywikibot.replaceExcept(text,
                                                u'٠١٢٣٤٥٦٧٨٩'[i],
                                                u'۰۱۲۳۴۵۶۷۸۹'[i], exceptions)
         # do not change digits in class, style and table params
         pattern = re.compile(u'=".*?"', re.UNICODE)
         exceptions.append(pattern)
         # do not change digits inside html-tags
         pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
         exceptions.append(pattern)
         exceptions.append('table') #exclude tables for now
         for i in range(0,10):
             if self.site.lang=='ckb':
                 text = pywikibot.replaceExcept(text, str(i),
                                                u'٠١٢٣٤٥٦٧٨٩'[i], exceptions)
             else:
                 text = pywikibot.replaceExcept(text, str(i),
                                                u'۰۱۲۳۴۵۶۷۸۹'[i], exceptions)
     return text
    def commonsfiledesc(self, text):
        # section headers to {{int:}} versions
        exceptions = ["comment", "includeonly", "math", "noinclude", "nowiki", "pre", "source", "ref", "timeline"]
        text = pywikibot.replaceExcept(
            text, r"([\r\n]|^)\=\= *Summary *\=\=", r"\1== {{int:filedesc}} ==", exceptions, True
        )
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=",
            r"\1== {{int:license}} ==",
            exceptions,
            True,
        )
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n])\=\= *(Licensing|License information|{{int:license-header}}) *\=\=",
            r"\1== {{int:license}} ==",
            exceptions,
            True,
        )

        # frequent field values to {{int:}} versions
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n]\|[Ss]ource *\= *)(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])",
            r"\1{{own}}\2",
            exceptions,
            True,
        )
        text = pywikibot.replaceExcept(
            text, r"(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])", r"\1\2", exceptions, True
        )

        # added to transwikied pages
        text = pywikibot.replaceExcept(text, r"__NOTOC__", "", exceptions, True)

        # tracker element for js upload form
        text = pywikibot.replaceExcept(text, r"<!-- *{{ImageUpload\|(?:full|basic)}} *-->", "", exceptions[1:], True)
        text = pywikibot.replaceExcept(text, r"{{ImageUpload\|(?:basic|full)}}", "", exceptions, True)

        # duplicated section headers
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=",
            r"\1== {{int:filedesc}} ==",
            exceptions,
            True,
        )
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n]|^)\=\= *{{int:license}} *\=\=(?:[\r\n ]*)\=\= *{{int:license}} *\=\=",
            r"\1== {{int:license}} ==",
            exceptions,
            True,
        )
        return text
    def removeUselessSpaces(self, text):
        result = []
        multipleSpacesR = re.compile("  +")
        spaceAtLineEndR = re.compile(" $")

        exceptions = ["comment", "math", "nowiki", "pre", "startspace", "table", "template"]
        text = pywikibot.replaceExcept(text, multipleSpacesR, " ", exceptions)
        text = pywikibot.replaceExcept(text, spaceAtLineEndR, "", exceptions)

        return text
Example #7
0
    def fixReferences(self, text):
        #http://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm
        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace']

        # it should be name = " or name=" NOT name   ="
        text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)
        #remove empty <ref/>-tag
        text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions)
        text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions)
        return text
Example #8
0
    def fixReferences(self, text):
        #http://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm
        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace']

        # it should be name = " or name=" NOT name   ="
        text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)
        #remove empty <ref/>-tag
        text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions)
        text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions)
        return text
Example #9
0
 def fixTypo(self, text):
     exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace', 'gallery', 'hyperlink', 'interwiki', 'link']
     # change <number> ccm -> <number> cm³
     text = pywikibot.replaceExcept(text, ur'(\d)\s*&nbsp;ccm', ur'\1&nbsp;cm³', exceptions)
     text = pywikibot.replaceExcept(text, ur'(\d)\s*ccm', ur'\1&nbsp;cm³', exceptions)
     # Solve wrong Nº sign with °C or °F
     # additional exception requested on fr-wiki for this stuff
     pattern = re.compile(u'«.*?»', re.UNICODE)
     exceptions.append(pattern)
     text = pywikibot.replaceExcept(text, ur'(\d)\s*&nbsp;[º°]([CF])', ur'\1&nbsp;°\2', exceptions)
     text = pywikibot.replaceExcept(text, ur'(\d)\s*[º°]([CF])', ur'\1&nbsp;°\2', exceptions)
     text = pywikibot.replaceExcept(text, ur'º([CF])', ur'°\1', exceptions)
     return text
Example #10
0
 def fixHtml(self, text):
     # Everything case-insensitive (?i)
     # Keep in mind that MediaWiki automatically converts <br> to <br />
     exceptions = [
         'nowiki', 'comment', 'math', 'pre', 'source', 'startspace'
     ]
     text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''",
                                    exceptions)
     text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>',
                                    r"'''\1'''", exceptions)
     text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''",
                                    exceptions)
     text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''",
                                    exceptions)
     # horizontal line without attributes in a single line
     text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
                                    r'\1----\2', exceptions)
     # horizontal line with attributes; can't be done with wiki syntax
     # so we only make it XHTML compliant
     text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
                                    r'<hr \1 />', exceptions)
     # a header where only spaces are in the same line
     for level in range(1, 7):
         equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
         text = pywikibot.replaceExcept(
             text, r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' %
             (level, level), r'%s' % equals, exceptions)
     #remove empty <ref/>-tag
     text = pywikibot.replaceExcept(text, r'(?i)<ref\s*/>', r'', exceptions)
     # TODO: maybe we can make the bot replace <p> tags with \r\n's.
     return text
Example #11
0
    def commonsfiledesc(self, text):
        # section headers to {{int:}} versions
        exceptions = [
            'comment', 'includeonly', 'math', 'noinclude', 'nowiki', 'pre',
            'source', 'ref', 'timeline'
        ]
        text = pywikibot.replaceExcept(text, r"([\r\n]|^)\=\= *Summary *\=\=",
                                       r"\1== {{int:filedesc}} ==", exceptions,
                                       True)
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=",
            r"\1== {{int:license}} ==", exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n])\=\= *(Licensing|License information|{{int:license-header}}) *\=\=",
            r"\1== {{int:license}} ==", exceptions, True)

        # frequent field values to {{int:}} versions
        text = pywikibot.replaceExcept(
            text,
            r'([\r\n]\|[Ss]ource *\= *)(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])',
            r'\1{{own}}\2', exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])',
            r'\1\2', exceptions, True)

        # added to transwikied pages
        text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions,
                                       True)

        # tracker element for js upload form
        text = pywikibot.replaceExcept(
            text, r'<!-- *{{ImageUpload\|(?:full|basic)}} *-->', '',
            exceptions[1:], True)
        text = pywikibot.replaceExcept(text,
                                       r'{{ImageUpload\|(?:basic|full)}}', '',
                                       exceptions, True)

        # duplicated section headers
        text = pywikibot.replaceExcept(
            text,
            r'([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=',
            r'\1== {{int:filedesc}} ==', exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r'([\r\n]|^)\=\= *{{int:license}} *\=\=(?:[\r\n ]*)\=\= *{{int:license}} *\=\=',
            r'\1== {{int:license}} ==', exceptions, True)
        return text
Example #12
0
    def commonsfiledesc(self, text):
        # section headers to {{int:}} versions
        exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki',
                      'pre', 'source', 'ref', 'timeline']
        text = pywikibot.replaceExcept(text,
                                       r"([\r\n]|^)\=\= *Summary *\=\=",
                                       r"\1== {{int:filedesc}} ==",
                                       exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=",
            r"\1== {{int:license-header}} ==", exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r"([\r\n])\=\= *(Licensing|License information|{{int:license}}) *\=\=",
            r"\1== {{int:license-header}} ==", exceptions, True)

        # frequent field values to {{int:}} versions
        text = pywikibot.replaceExcept(
            text,
            r'([\r\n]\|[Ss]ource *\= *)(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])',
            r'\1{{own}}\2', exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])',
            r'\1\2', exceptions, True)

        # added to transwikied pages
        text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions, True)

        # tracker element for js upload form
        text = pywikibot.replaceExcept(
            text,
            r'<!-- *{{ImageUpload\|(?:full|basic)}} *-->',
            '', exceptions[1:], True)
        text = pywikibot.replaceExcept(text, r'{{ImageUpload\|(?:basic|full)}}',
                                       '', exceptions, True)

        # duplicated section headers
        text = pywikibot.replaceExcept(
            text,
            r'([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=',
            r'\1== {{int:filedesc}} ==', exceptions, True)
        text = pywikibot.replaceExcept(
            text,
            r'([\r\n]|^)\=\= *{{int:license-header}} *\=\=(?:[\r\n ]*)\=\= *{{int:license-header}} *\=\=',
            r'\1== {{int:license-header}} ==', exceptions, True)

        return text
Example #13
0
 def fixSyntaxSave(self, text):
     exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace']
     # external link in double brackets
     text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://[^\]]+?)\]\]', r'[\g<url>]', exceptions)
     # external link starting with double bracket
     text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://.+?)\]', r'[\g<url>]', exceptions)
     # external link and description separated by a dash, with
     # whitespace in front of the dash, so that it is clear that
     # the dash is not a legitimate part of the URL.
     text = pywikibot.replaceExcept(text, r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions)
     # dash in external link, where the correct end of the URL can
     # be detected from the file extension. It is very unlikely that
     # this will cause mistakes.
     text = pywikibot.replaceExcept(text, r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions)
     return text
def sort_by_country_subcat(subcat, subject):
    print subcat
    subcat = subcat.replace("_", " ")
    subject = subject.replace("_", " ")
    if subcat.startswith(subject):
        temp1 = subcat[len(subject) :].lstrip()
        if temp1.startswith("from"):
            temp2 = temp1[len("from") :].lstrip()
        elif temp1.startswith("of"):
            temp2 = temp1[len("of") :].lstrip()
        elif temp1.startswith("in"):
            temp2 = temp1[len("in") :].lstrip()
        else:
            temp2 = ""
        if temp2:
            if temp2.startswith("the"):
                country = temp2[len("the") :].lstrip()
            else:
                country = temp2
            page = wikipedia.Page(wikipedia.getSite(), "Category:" + subcat)
            old = u"\[\[[cC]ategory:" + subject + u" by country[^\]]*\]\]"
            new = u"[[Category:" + subject + u" by country|" + country + u"]]"
            comment = u"Sorting [[:Category:" + subject + u" by country]]"
            newtext = wikipedia.replaceExcept(page.get(), old, new, [])
            wikipedia.showDiff(page.get(), newtext)
            page.put(newtext, comment)
Example #15
0
 def translateMagicWords(self, text):
     """
     Makes sure that localized namespace names are used.
     """
     # not wanted at ru
     # arz uses english stylish codes
     if self.site.lang not in ['arz', 'ru']:
         exceptions = ['nowiki', 'comment', 'math', 'pre']
         for magicWord in [
                 'img_thumbnail',
                 'img_left',
                 'img_center',
                 'img_right',
                 'img_none',
                 'img_framed',
                 'img_frameless',
                 'img_border',
                 'img_upright',
         ]:
             aliases = self.site.siteinfo('magicwords').get(magicWord)
             if not aliases:
                 continue
             text = pywikibot.replaceExcept(
                 text, r'\[\[(?P<left>.+?:.+?\..+?\|) *(' +
                 '|'.join(aliases) + ') *(?P<right>(\|.*?)?\]\])',
                 r'[[\g<left>' + aliases[0] + '\g<right>', exceptions)
     return text
Example #16
0
 def removeDeprecatedTemplates(self, text):
     if deprecatedTemplates.has_key(self.site.family.name) and deprecatedTemplates[self.site.family.name].has_key(self.site.lang):
         for template in deprecatedTemplates[self.site.family.name][self.site.lang]:
             if not self.site.nocapitalize:
                 template = '[' + template[0].upper() + template[0].lower() + ']' + template[1:]
             text = wikipedia.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' + template + '(?P<parameters>\|[^}]+|)}}', '', ['comment', 'math', 'nowiki', 'pre'])
     return text
Example #17
0
def sort_by_country_subcat(subcat, subject):
    print subcat
    subcat = subcat.replace('_', ' ')
    subject = subject.replace('_', ' ')
    if subcat.startswith(subject):
        temp1 = subcat[len(subject):].lstrip()
        if temp1.startswith('from'):
            temp2 = temp1[len('from'):].lstrip()
        elif temp1.startswith('of'):
            temp2 = temp1[len('of'):].lstrip()
        elif temp1.startswith('in'):
            temp2 = temp1[len('in'):].lstrip()
        else:
            temp2 = ''
        if temp2:
            if temp2.startswith('the'):
                country = temp2[len('the'):].lstrip()
            else:
                country = temp2
            page = wikipedia.Page(wikipedia.getSite(), 'Category:' + subcat)
            old = u'\[\[[cC]ategory:' + subject + u' by country[^\]]*\]\]'
            new = u'[[Category:' + subject + u' by country|' + country + u']]'
            comment = u'Sorting [[:Category:' + subject + u' by country]]'
            newtext = wikipedia.replaceExcept(page.get(), old, new, [])
            wikipedia.showDiff(page.get(), newtext)
            page.put(newtext, comment)
def sort_by_country_subcat(subcat, subject):
    print subcat
    subcat = subcat.replace('_', ' ')
    subject = subject.replace('_', ' ')
    if subcat.startswith(subject):
	temp1 = subcat[len(subject):].lstrip()
	if temp1.startswith('from'):
	    temp2 = temp1[len('from'):].lstrip()
	elif temp1.startswith('of'):
            temp2 = temp1[len('of'):].lstrip()
        elif temp1.startswith('in'):
            temp2 = temp1[len('in'):].lstrip()
	else:
	    temp2 = ''
	if temp2:
	    if temp2.startswith('the'):
		country = temp2[len('the'):].lstrip() 
	    else:
		country = temp2
	    page = wikipedia.Page(wikipedia.getSite(), 'Category:' + subcat)
	    old = u'\[\[[cC]ategory:' + subject + u' by country[^\]]*\]\]'
	    new = u'[[Category:' + subject + u' by country|' + country + u']]'
	    comment = u'Sorting [[:Category:' + subject + u' by country]]'
	    newtext = wikipedia.replaceExcept(page.get(), old, new, [])
	    wikipedia.showDiff(page.get(), newtext)
	    page.put(newtext, comment)
Example #19
0
    def translateAndCapitalizeNamespaces(self, text):
        """
        Makes sure that localized namespace names are used.
        """
        # arz uses english stylish codes
        if self.site.sitename() == 'wikipedia:arz':
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for nsNumber in family.namespaces:
            if not family.isDefinedNSLanguage(nsNumber, self.site.lang):
                # Skip undefined namespaces
                continue
            namespaces = list(self.site.namespace(nsNumber, all = True))
            thisNs = namespaces.pop(0)
            if nsNumber == 6 and family.name == 'wikipedia' and \
               self.site.lang in ('en', 'fr'):
                # do not change "Image" on en-wiki and fr-wiki
                for image in [u'Image', u'image']:
                    if image in namespaces:
                        namespaces.remove(image)
            # skip main (article) namespace
            if thisNs and namespaces:
                text = pywikibot.replaceExcept(text, r'\[\[\s*(' + '|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions)
        return text
Example #20
0
    def translateAndCapitalizeNamespaces(self, text):
        """
        Makes sure that localized namespace names are used.
        """
        # arz uses english stylish codes
        if self.site.sitename() == 'wikipedia:arz':
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for nsNumber in family.namespaces:
            if not family.isDefinedNSLanguage(nsNumber, self.site.lang):
                # Skip undefined namespaces
                continue
            namespaces = list(self.site.namespace(nsNumber, all=True))
            thisNs = namespaces.pop(0)
            if nsNumber == 6 and family.name == 'wikipedia' and \
               self.site.lang in ('en', 'fr'):
                # do not change "Image" on en-wiki and fr-wiki
                for image in [u'Image', u'image']:
                    if image in namespaces:
                        namespaces.remove(image)
            # skip main (article) namespace
            if thisNs and namespaces:
                text = pywikibot.replaceExcept(
                    text, r'\[\[\s*(' + '|'.join(namespaces) +
                    ') *:(?P<nameAndLabel>.*?)\]\]',
                    r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions)
        return text
    def translateAndCapitalizeNamespaces(self, text):
        """
        Makes sure that localized namespace names are used.
        """
        # arz uses english stylish codes
        if self.site.sitename() == "wikipedia:arz":
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ["nowiki", "comment", "math", "pre"]

        for nsNumber in family.namespaces:
            if not family.isDefinedNSLanguage(nsNumber, self.site.lang):
                # Skip undefined namespaces
                continue
            namespaces = list(self.site.namespace(nsNumber, all=True))
            thisNs = namespaces.pop(0)
            if nsNumber == 6 and family.name == "wikipedia" and self.site.lang in ("en", "fr"):
                # do not change "Image" on en-wiki and fr-wiki
                for image in [u"Image", u"image"]:
                    if image in namespaces:
                        namespaces.remove(image)
            # skip main (article) namespace
            if thisNs and namespaces:
                text = pywikibot.replaceExcept(
                    text,
                    r"\[\[\s*(" + "|".join(namespaces) + ") *:(?P<nameAndLabel>.*?)\]\]",
                    r"[[" + thisNs + ":\g<nameAndLabel>]]",
                    exceptions,
                )
        return text
 def fixArabicLetters(self, text):
     if self.site.lang == "ckb" or self.site.lang == "fa":
         exceptions = [
             "gallery",
             "hyperlink",
             "interwiki",
             # but changes letters inside wikilinks
             #'link',
             "math",
             "pre",
             "template",
             "timeline",
             "ref",
             "source",
             "startspace",
             "inputbox",
         ]
         # do not change inside file links
         namespaces = list(self.site.namespace(6, all=True))
         pattern = re.compile(u"\[\[(" + "|".join(namespaces) + "):.+?\..+?\]\]", re.UNICODE)
         exceptions.append(pattern)
         text = pywikibot.replaceExcept(text, u",", u"،", exceptions)
         if self.site.lang == "ckb":
             text = pywikibot.replaceExcept(text, ur"ه([.،_<\]\s])", ur"ە\1", exceptions)
             text = pywikibot.replaceExcept(text, u"ه‌", u"ە", exceptions)
             text = pywikibot.replaceExcept(text, u"ه", u"ھ", exceptions)
         text = pywikibot.replaceExcept(text, u"ك", u"ک", exceptions)
         text = pywikibot.replaceExcept(text, ur"[ىي]", u"ی", exceptions)
         # replace persian digits
         for i in range(0, 10):
             if self.site.lang == "ckb":
                 text = pywikibot.replaceExcept(text, u"۰۱۲۳۴۵۶۷۸۹"[i], u"٠١٢٣٤٥٦٧٨٩"[i], exceptions)
             else:
                 text = pywikibot.replaceExcept(text, u"٠١٢٣٤٥٦٧٨٩"[i], u"۰۱۲۳۴۵۶۷۸۹"[i], exceptions)
         # do not change digits in class, style and table params
         pattern = re.compile(u'=".*?"', re.UNICODE)
         exceptions.append(pattern)
         # do not change digits inside html-tags
         pattern = re.compile(u"<[/]*?[^</]+?[/]*?>", re.UNICODE)
         exceptions.append(pattern)
         exceptions.append("table")  # exclude tables for now
         for i in range(0, 10):
             if self.site.lang == "ckb":
                 text = pywikibot.replaceExcept(text, str(i), u"٠١٢٣٤٥٦٧٨٩"[i], exceptions)
             else:
                 text = pywikibot.replaceExcept(text, str(i), u"۰۱۲۳۴۵۶۷۸۹"[i], exceptions)
     return text
 def fixArabicLetters(self, text):
     exceptions = [
         'gallery',
         'hyperlink',
         'interwiki',
         # but changes letters inside wikilinks
         #'link',
         'math',
         'pre',
         'template',
         'timeline',
         'ref',
         'source',
         'startspace',
         'inputbox',
     ]
     # valid digits
     digits = {
         'ckb' : u'٠١٢٣٤٥٦٧٨٩',
         'fa'  : u'۰۱۲۳۴۵۶۷۸۹'
     }
     new = digits.pop(self.site.lang)
     # This only works if there are only two items in digits dict
     old = digits[digits.keys()[0]]
     # do not change inside file links
     namespaces = list(self.site.namespace(6, all=True))
     pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]',
                          re.UNICODE)
     exceptions.append(pattern)
     text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
     if self.site.lang=='ckb':
         text = pywikibot.replaceExcept(text,
                                        ur'ه([.،_<\]\s])',
                                        ur'ە\1', exceptions)
         text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions)
         text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
     text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
     text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions)
     # replace persian/arabic digits
     for i in xrange(0, 10):
         text = pywikibot.replaceExcept(text, old[i], new[i], exceptions)
     # do not change digits in class, style and table params
     pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE)
     exceptions.append(pattern)
     # do not change digits inside html-tags
     pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
     exceptions.append(pattern)
     exceptions.append('table') #exclude tables for now
     # replace digits
     for i in xrange(0, 10):
         text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)
     ##fixing pipe and trailing for fa. Thanks ZxxZxxZ
     if self.site.lang=='fa':
         faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + u'ًٌٍَُِّْٓٔ'
         text = re.sub(u'\[\[([^\]\|]*)]]([‌%s]+)' % faChrs, ur'[[\1|\1\2]]', text)
         text = re.sub(u'\[\[([^\]\|]*)\|(.+?)]]([‌%s]+)' % faChrs, ur'[[\1|\2\3]]', text)
     return text
Example #24
0
 def fixArabicLetters(self, text):
     if self.site.lang == 'ckb' or self.site.lang == 'fa':
         exceptions = [
             'gallery',
             'hyperlink',
             'interwiki',
             # but changes letters inside wikilinks
             #'link',
             'math',
             'pre',
             'template',
             'timeline',
             'ref',
             'source',
             'startspace',
             'inputbox',
         ]
         # do not change inside file links
         namespaces = list(self.site.namespace(6, all=True))
         pattern = re.compile(
             u'\[\[(' + '|'.join(namespaces) + '):.+?\..+?\]\]', re.UNICODE)
         exceptions.append(pattern)
         text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
         if self.site.lang == 'ckb':
             text = pywikibot.replaceExcept(text, ur'ه([.،_<\]\s])',
                                            ur'ە\1', exceptions)
             text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions)
             text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
         text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
         text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions)
         # replace persian digits
         for i in range(0, 10):
             if self.site.lang == 'ckb':
                 text = pywikibot.replaceExcept(text, u'۰۱۲۳۴۵۶۷۸۹'[i],
                                                u'٠١٢٣٤٥٦٧٨٩'[i],
                                                exceptions)
             else:
                 text = pywikibot.replaceExcept(text, u'٠١٢٣٤٥٦٧٨٩'[i],
                                                u'۰۱۲۳۴۵۶۷۸۹'[i],
                                                exceptions)
         # do not change digits in class, style and table params
         pattern = re.compile(u'=".*?"', re.UNICODE)
         exceptions.append(pattern)
         # do not change digits inside html-tags
         pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
         exceptions.append(pattern)
         exceptions.append('table')  #exclude tables for now
         for i in range(0, 10):
             if self.site.lang == 'ckb':
                 text = pywikibot.replaceExcept(text, str(i),
                                                u'٠١٢٣٤٥٦٧٨٩'[i],
                                                exceptions)
             else:
                 text = pywikibot.replaceExcept(text, str(i),
                                                u'۰۱۲۳۴۵۶۷۸۹'[i],
                                                exceptions)
     return text
 def fixStyle(self, text):
     exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace']
     # convert prettytable to wikitable class
     if self.site.language in ('de', 'en'):
         text = pywikibot.replaceExcept(text,
                                        ur'(class="[^"]*)prettytable([^"]*")',
                                        ur'\1wikitable\2', exceptions)
     return text
Example #26
0
    def markActiveTables(self, text):
        """
        Marks all table start and end tags that are not disabled by nowiki
        tags, comments etc.

        We will then later only work on these marked tags.
        """
        tableStartTagR = re.compile("<table", re.IGNORECASE)
        tableEndTagR = re.compile("</table>", re.IGNORECASE)

        text = wikipedia.replaceExcept(
            text, tableStartTagR, "<##table##", exceptions=["comment", "math", "nowiki", "pre", "source"]
        )
        text = wikipedia.replaceExcept(
            text, tableEndTagR, "</##table##>", exceptions=["comment", "math", "nowiki", "pre", "source"]
        )
        return text
Example #27
0
    def markActiveTables(self, text):
        """
        Marks all table start and end tags that are not disabled by nowiki
        tags, comments etc.

        We will then later only work on these marked tags.
        """
        tableStartTagR = re.compile("<table", re.IGNORECASE)
        tableEndTagR = re.compile("</table>", re.IGNORECASE)

        text = pywikibot.replaceExcept(text, tableStartTagR, "<##table##",
                                       exceptions=['comment', 'math',
                                                   'nowiki', 'pre', 'source'])
        text = pywikibot.replaceExcept(text, tableEndTagR, "</##table##>",
                                       exceptions=['comment', 'math',
                                                   'nowiki', 'pre', 'source'])
        return text
Example #28
0
    def markActiveTables(self, text):
        """
        Marks all table start and end tags that are not disabled by nowiki
        tags, comments etc.

        We will then later only work on these marked tags.
        """
        tableStartTagR = re.compile("<table", re.IGNORECASE)
        tableEndTagR = re.compile("</table>", re.IGNORECASE)

        text = pywikibot.replaceExcept(text, tableStartTagR, "<##table##",
                                       exceptions=['comment', 'math',
                                                   'nowiki', 'pre', 'source'])
        text = pywikibot.replaceExcept(text, tableEndTagR, "</##table##>",
                                       exceptions=['comment', 'math',
                                                   'nowiki', 'pre', 'source'])
        return text
Example #29
0
 def removeNonBreakingSpaceBeforePercent(self, text):
     '''
     Newer MediaWiki versions automatically place a non-breaking space in
     front of a percent sign, so it is no longer required to place it
     manually.
     '''
     text = pywikibot.replaceExcept(text, r'(\d)&nbsp;%', r'\1 %', ['timeline'])
     return text
 def removeNonBreakingSpaceBeforePercent(self, text):
     """
     Newer MediaWiki versions automatically place a non-breaking space in
     front of a percent sign, so it is no longer required to place it
     manually.
     """
     text = pywikibot.replaceExcept(text, r"(\d)&nbsp;%", r"\1 %", ["timeline"])
     return text
Example #31
0
 def removeNonBreakingSpaceBeforePercent(self, text):
     '''
     Newer MediaWiki versions automatically place a non-breaking space in
     front of a percent sign, so it is no longer required to place it
     manually.
     '''
     text = pywikibot.replaceExcept(text, r'(\d)&nbsp;%', r'\1 %', ['timeline'])
     return text
    def transferImage(self, sourceImagePage, debug=False):
        """Gets a wikilink to an image, downloads it and its description,
           and uploads it to another wikipedia.
           Returns the filename which was used to upload the image
           This function is used by imagetransfer.py and by copy_table.py
        """
        sourceSite = sourceImagePage.site()
        if debug: print "-" * 50
        if debug: print "Found image: %s"% imageTitle
        url = sourceImagePage.fileUrl()
        newname = sourceImagePage.titleWithoutNamespace()
        pywikibot.output(u"URL should be: %s" % url)
        # localize the text that should be printed on the image description page
        try:
            description = sourceImagePage.get()
            #unlink categories
            #description = pywikibot.removeCategoryLinks(description,pywikibot.getSite('commons', 'commons'))
            description = re.sub(u'\[\[Category', u'[[:Category', description, flags=re.IGNORECASE)
            # try to translate license templates
            if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates:
                for old, new in licenseTemplates[(sourceSite.sitename(), self.targetSite.sitename())].iteritems():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = pywikibot.replaceExcept(description, old, new,
                                                          ['comment', 'math',
                                                           'nowiki', 'pre'])

            description = pywikibot.translate(self.targetSite, copy_message) \
                          % (sourceSite, description)
            description += '\n\n' + sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += "\r\n\r\n" + sourceImagePage.aslink(forceInterwiki = True)
            #add cat
            description += "\n[[Kategooria:Commonsist kopeeritud pildid]]\n"
        except pywikibot.NoPage:
            description=''
            print "Image does not exist or description page is empty."
        except pywikibot.IsRedirectPage:
            description=''
            print "Image description page is redirect."
        else:
            #bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = pywikibot.getSite('commons', 'commons'))
            
            bot = upload.UploadRobot(url = url, description = description, useFilename = newname, keepFilename=True, verifyDescription=False, ignoreWarning = False, targetSite = self.targetSite)
            # try to upload
            targetFilename = bot.run()
            if targetFilename and self.targetSite.family.name == 'commons' and self.targetSite.lang == 'commons':
                # upload to Commons was successful
                reason = pywikibot.translate(sourceSite, nowCommonsMessage)
                # try to delete the original image if we have a sysop account
                if sourceSite.family.name in config.sysopnames and sourceSite.lang in config.sysopnames[sourceSite.family.name]:
                    if sourceImagePage.delete(reason):
                        return
                if sourceSite.lang in nowCommonsTemplate and sourceSite.family.name in config.usernames and sourceSite.lang in config.usernames[sourceSite.family.name]:
                    # add the nowCommons template.
                    pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title())
                    sourceImagePage.put(sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, comment = nowCommonsMessage[sourceSite.lang])
Example #33
0
 def fixHtml(self, text):
     # Everything case-insensitive (?i)
     # Keep in mind that MediaWiki automatically converts <br> to <br />
     exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                   'startspace']
     text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''",
                                    exceptions)
     text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>',
                                    r"'''\1'''", exceptions)
     text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''",
                                    exceptions)
     text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''",
                                    exceptions)
     # horizontal line without attributes in a single line
     text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
                                    r'\1----\2', exceptions)
     # horizontal line with attributes; can't be done with wiki syntax
     # so we only make it XHTML compliant
     text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
                                    r'<hr \1 />',
                                    exceptions)
     # a header where only spaces are in the same line
     for level in range(1, 7):
         equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
         text = pywikibot.replaceExcept(
             text,
             r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])'
             % (level, level),
             r'%s' % equals,
             exceptions)
     # TODO: maybe we can make the bot replace <p> tags with \r\n's.
     return text
Example #34
0
 def fixArabicLetters(self, text):
     exceptions = [
         'gallery',
         'hyperlink',
         'interwiki',
         # but changes letters inside wikilinks
         #'link',
         'math',
         'pre',
         'template',
         'timeline',
         'ref',
         'source',
         'startspace',
         'inputbox',
     ]
     # valid digits
     digits = {
         'ckb': u'٠١٢٣٤٥٦٧٨٩',
         'fa': u'۰۱۲۳۴۵۶۷۸۹',
     }
     faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
     new = digits.pop(self.site.lang)
     # This only works if there are only two items in digits dict
     old = digits[digits.keys()[0]]
     # do not change inside file links
     namespaces = list(self.site.namespace(6, all=True))
     pattern = re.compile(
         u'\[\[(' + '|'.join(namespaces) +
         '):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]',
         re.UNICODE)
     #not to let bot edits in latin content
     exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
                                  % {'fa': faChrs}))
     exceptions.append(pattern)
     text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
     if self.site.lang == 'ckb':
         text = pywikibot.replaceExcept(text,
                                        ur'ه([.،_<\]\s])',
                                        ur'ە\1', exceptions)
         text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions)
         text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
     text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
     text = pywikibot.replaceExcept(text, u'[ىي]', u'ی', exceptions)
     return text
     # replace persian/arabic digits
     ## deactivated due to bug #3539407
     for i in xrange(0, 10):
         text = pywikibot.replaceExcept(text, old[i], new[i], exceptions)
     # do not change digits in class, style and table params
     pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE)
     exceptions.append(pattern)
     # do not change digits inside html-tags
     pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
     exceptions.append(pattern)
     exceptions.append('table')  # exclude tables for now
     # replace digits
     for i in xrange(0, 10):
         text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)
     return text
def categoriesChecked(category):
    page = wikipedia.Page(wikipedia.getSite(), category)
    if (page.exists()):
        old = u'\{\{UncategorizedHeader([^\}]*)\}\}'
        new = u'{{UncategorizedHeader\\1|galleries=~~~~}}'
        newtext = wikipedia.replaceExcept(page.get(), old, new, [])
        comment = u'No more images in galleries'
        wikipedia.showDiff(page.get(), newtext)
        page.put(newtext, comment)
def categoriesChecked(category):
    page = wikipedia.Page(wikipedia.getSite(), category)
    if (page.exists()):
	old = u'\{\{UncategorizedHeader([^\}]*)\}\}'
	new = u'{{UncategorizedHeader\\1|galleries=~~~~}}'
	newtext = wikipedia.replaceExcept(page.get(), old, new, [])
	comment = u'No more images in galleries'
	wikipedia.showDiff(page.get(), newtext)
        page.put(newtext, comment)
Example #37
0
 def fixStyle(self, text):
     exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                   'startspace']
     # convert prettytable to wikitable class
     if self.site.language in ('de', 'en'):
         text = pywikibot.replaceExcept(text,
                                        r'(class="[^"]*)prettytable([^"]*")',
                                        r'\1wikitable\2', exceptions)
     return text
Example #38
0
 def fixArabicLetters(self, text):
     exceptions = [
         'gallery',
         'hyperlink',
         'interwiki',
         # but changes letters inside wikilinks
         #'link',
         'math',
         'pre',
         'template',
         'timeline',
         'ref',
         'source',
         'startspace',
         'inputbox',
     ]
     # valid digits
     digits = {
         'ckb' : u'٠١٢٣٤٥٦٧٨٩',
         'fa'  : u'۰۱۲۳۴۵۶۷۸۹'
     }
     new = digits.pop(self.site.lang)
     # This only works if there are only two items in digits dict
     old = digits[digits.keys()[0]]
     # do not change inside file links
     namespaces = list(self.site.namespace(6, all = True))
     pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\..+?\]\]',
                          re.UNICODE)
     exceptions.append(pattern)
     text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
     if self.site.lang=='ckb':
         text = pywikibot.replaceExcept(text,
                                        ur'ه([.،_<\]\s])',
                                        ur'ە\1', exceptions)
         text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions)
         text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
     text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
     text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions)
     # replace persian digits
     for i in range(0,10):
         text = pywikibot.replaceExcept(text, old[i], new[i], exceptions)
     # do not change digits in class, style and table params
     pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE)
     exceptions.append(pattern)
     # do not change digits inside html-tags
     pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
     exceptions.append(pattern)
     exceptions.append('table') #exclude tables for now
     for i in range(0,10):
         text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)
     return text
Example #39
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English, German,
        and French Wikipedia. It might be that it is not wanted on other wikis.
        If there are any complaints, please file a bug report.
        """
        # FIXME: This breaks redirects.
        text = wikipedia.replaceExcept(text, r'(?m)^(?P<bullet>(\*+|#+):*)(?P<char>[^\s\*#:].+?)', '\g<bullet> \g<char>', ['comment', 'math', 'nowiki', 'pre'])
        return text
Example #40
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English, German,
        and French Wikipedia. It might be that it is not wanted on other wikis.
        If there are any complaints, please file a bug report.
        """
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline']
        if not self.redirect and pywikibot.calledModuleName() <> 'capitalize_redirects':
            text = pywikibot.replaceExcept(text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions)
        return text
Example #41
0
    def transferImage(self, sourceImagePage, debug=False):
        """Gets a wikilink to an image, downloads it and its description,
           and uploads it to another wikipedia.
           Returns the filename which was used to upload the image
           This function is used by imagetransfer.py and by copy_table.py
        """
        sourceSite = sourceImagePage.site()
        if debug: print "-" * 50
        if debug: print "Found image: %s"% imageTitle
        url = sourceImagePage.fileUrl().encode('utf-8')
        pywikibot.output(u"URL should be: %s" % url)
        # localize the text that should be printed on the image description page
        try:
            description = sourceImagePage.get()
            # try to translate license templates
            if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates:
                for old, new in licenseTemplates[(sourceSite.sitename(), self.targetSite.sitename())].iteritems():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = pywikibot.replaceExcept(description, old, new,
                                                          ['comment', 'math',
                                                           'nowiki', 'pre'])

            description = pywikibot.translate(self.targetSite, copy_message) \
                          % (sourceSite, description)
            description += '\n\n' + sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += "\r\n\r\n" + sourceImagePage.aslink(forceInterwiki = True)
        except pywikibot.NoPage:
            description=''
            print "Image does not exist or description page is empty."
        except pywikibot.IsRedirectPage:
            description=''
            print "Image description page is redirect."
        else:
            bot = upload.UploadRobot(url = url, description = description, targetSite = self.targetSite, urlEncoding = sourceSite.encoding())
            # try to upload
            targetFilename = bot.run()
            if targetFilename and self.targetSite.family.name == 'commons' and self.targetSite.lang == 'commons':
                # upload to Commons was successful
                reason = pywikibot.translate(sourceSite, nowCommonsMessage)
                # try to delete the original image if we have a sysop account
                if sourceSite.family.name in config.sysopnames and sourceSite.lang in config.sysopnames[sourceSite.family.name]:
                    if sourceImagePage.delete(reason):
                        return
                if sourceSite.lang in nowCommonsTemplate and sourceSite.family.name in config.usernames and sourceSite.lang in config.usernames[sourceSite.family.name]:
                    # add the nowCommons template.
                    pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title())
                    sourceImagePage.put(sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, comment = nowCommonsMessage[sourceSite.lang])
Example #42
0
    def cleanUpSectionHeaders(self, text):
        """
        For better readability of section header source code, puts a space
        between the equal signs and the title.
        Example: ==Section title== becomes == Section title ==

        NOTE: This space is recommended in the syntax help on the English and
        German Wikipedia. It might be that it is not wanted on other wikis.
        If there are any complaints, please file a bug report.
        """
        for level in range(1, 7):
            equals = '=' * level
            text = pywikibot.replaceExcept(text, r'\n' + equals + ' *(?P<title>[^=]+?) *' + equals + ' *\r\n', '\n' + equals + ' \g<title> ' + equals + '\r\n', ['comment', 'math', 'nowiki', 'pre'])
        return text
Example #43
0
    def transferImage(self, sourceImagePage, debug=False):
        """Gets a wikilink to an image, downloads it and its description,
           and uploads it to another wikipedia.
           Returns the filename which was used to upload the image
           This function is used by imagetransfer.py and by copy_table.py
        """
        sourceSite = sourceImagePage.site()
        if debug: print "-" * 50
        if debug: print "Found image: %s"% imageTitle
        url = sourceImagePage.fileUrl().encode('utf-8')
        pywikibot.output(u"URL should be: %s" % url)
        # localize the text that should be printed on the image description page
        try:
            description = sourceImagePage.get()
            # try to translate license templates
            if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates:
                for old, new in licenseTemplates[(sourceSite.sitename(), self.targetSite.sitename())].iteritems():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = pywikibot.replaceExcept(description, old, new,
                                                          ['comment', 'math',
                                                           'nowiki', 'pre'])

            description = pywikibot.translate(self.targetSite, copy_message) \
                          % (sourceSite, description)
            description += '\n\n' + sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += "\r\n\r\n" + sourceImagePage.aslink(forceInterwiki = True)
        except pywikibot.NoPage:
            description=''
            print "Image does not exist or description page is empty."
        except pywikibot.IsRedirectPage:
            description=''
            print "Image description page is redirect."
        else:
            bot = upload.UploadRobot(url = url, description = description, targetSite = self.targetSite, urlEncoding = sourceSite.encoding())
            # try to upload
            targetFilename = bot.run()
            if targetFilename and self.targetSite.family.name == 'commons' and self.targetSite.lang == 'commons':
                # upload to Commons was successful
                reason = pywikibot.translate(sourceSite, nowCommonsMessage)
                # try to delete the original image if we have a sysop account
                if sourceSite.family.name in config.sysopnames and sourceSite.lang in config.sysopnames[sourceSite.family.name]:
                    if sourceImagePage.delete(reason):
                        return
                if sourceSite.lang in nowCommonsTemplate and sourceSite.family.name in config.usernames and sourceSite.lang in config.usernames[sourceSite.family.name]:
                    # add the nowCommons template.
                    pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title())
                    sourceImagePage.put(sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, comment = nowCommonsMessage[sourceSite.lang])
Example #44
0
 def replaceDeprecatedTemplates(self, text):
     exceptions = ['comment', 'math', 'nowiki', 'pre']
     if self.site.family.name in deprecatedTemplates and self.site.lang in deprecatedTemplates[self.site.family.name]:
         for template in deprecatedTemplates[self.site.family.name][self.site.lang]:
             old = template[0]
             new = template[1]
             if new == None:
                 new = ''
             else:
                 new = '{{'+new+'}}'
             if not self.site.nocapitalize:
                 old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:]
             text = pywikibot.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' + old + '(?P<parameters>\|[^}]+|)}}', new, exceptions)
     return text
Example #45
0
 def fixArabicLetters(self, text):
     exceptions = [
         'gallery',
         'hyperlink',
         'interwiki',
         # but changes letters inside wikilinks
         #'link',
         'math',
         'pre',
         'template',
         'timeline',
         'ref',
         'source',
         'startspace',
         'inputbox',
     ]
     # valid digits
     digits = {
         'ckb' : u'٠١٢٣٤٥٦٧٨٩',
         'fa'  : u'۰۱۲۳۴۵۶۷۸۹'
     }
     new = digits.pop(self.site.lang)
     # This only works if there are only two items in digits dict
     old = digits[digits.keys()[0]]
     # do not change inside file links
     namespaces = list(self.site.namespace(6, all = True))
     pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\..+?\]\]',
                          re.UNICODE)
     exceptions.append(pattern)
     text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
     if self.site.lang=='ckb':
         text = pywikibot.replaceExcept(text,
                                        ur'ه([.،_<\]\s])',
                                        ur'ە\1', exceptions)
         text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions)
         text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
     text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
     text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions)
     # replace persian digits
     for i in range(0,10):
         text = pywikibot.replaceExcept(text, old[i], new[i], exceptions)
     # do not change digits in class, style and table params
     pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE)
     exceptions.append(pattern)
     # do not change digits inside html-tags
     pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
     exceptions.append(pattern)
     exceptions.append('table') #exclude tables for now
     ##fixing pipe and trailing for fa. Thanks ZxxZxxZ
     if self.site.lang=='fa':
         faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + u'ًٌٍَُِّْٓٔ'
         text = re.sub(u'\[\[([^\]\|]*)]]([‌%s]+)' % faChrs, ur'[[\1|\1\2]]', text)
         text = re.sub(u'\[\[([^\]\|]*)\|(.+?)]]([‌%s]+)' % faChrs, ur'[[\1|\2\3]]', text)
     for i in range(0,10):
         text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)
     return text
Example #46
0
def sort_TOL_subcat(parent, child):
    suffix = child.titleWithoutNamespace().replace(
        parent.titleWithoutNamespace(), u'').lstrip()
    wikipedia.output(parent.titleWithoutNamespace())
    wikipedia.output(suffix)
    # Replace \[\[[cC]ategory:<parent>[\^]*\]\]
    # With [[Category:<parent>|<suffix>]]
    old = u'\[\[[cC]ategory:' + parent.titleWithoutNamespace() + u'[^\]]*\]\]'
    new = u'[[Category:' + parent.titleWithoutNamespace(
    ) + u'|' + suffix + u']]'
    #newgal = u'[[' + child.title() + u'| ]]'
    newtext = wikipedia.replaceExcept(child.get(), old, new, [])
    comment = u'Sorting category'
    #commentgal = u'Moving to category with the same name'
    wikipedia.showDiff(child.get(), newtext)
    child.put(newtext, comment)
    '''
Example #47
0
 def doReplacements(self, original_text):
     """
     Returns the text which is generated by applying all replacements to
     the given text.
     """
     new_text = original_text
     exceptions = []
     if "inside-tags" in self.exceptions:
         exceptions += self.exceptions['inside-tags']
     if "inside" in self.exceptions:
         exceptions += self.exceptions['inside']
     for old, new in self.replacements:
         if self.sleep is not None:
             time.sleep(self.sleep)
         new_text = pywikibot.replaceExcept(new_text, old, new, exceptions,
                                            allowoverlap=self.allowoverlap)
     return new_text
Example #48
0
    def putSpacesInLists(self, text):
        """
        For better readability of bullet list and enumeration wiki source code,
        puts a space between the * or # and the text.

        NOTE: This space is recommended in the syntax help on the English, German,
        and French Wikipedia. It might be that it is not wanted on other wikis.
        If there are any complaints, please file a bug report.
        """
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline']
        if not self.redirect and pywikibot.calledModuleName(
        ) <> 'capitalize_redirects':
            text = pywikibot.replaceExcept(
                text,
                r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
                '\g<bullet> \g<char>', exceptions)
        return text
Example #49
0
    def translateAndCapitalizeNamespaces(self, text):
        """
        Makes sure that localized namespace names are used.
        """
        # arz uses english stylish codes
        if self.site.sitename() == 'wikipedia:arz':
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for nsNumber in family.namespaces:
            if not family.isDefinedNSLanguage(nsNumber, self.site.lang):
                # Skip undefined namespaces
                continue
            if nsNumber in (2, 3):
                # Skip user namespace, maybe gender is used
                continue
            namespaces = list(self.site.namespace(nsNumber, all=True))
            thisNs = namespaces.pop(0)
            if nsNumber == 6 and family.name == 'wikipedia':
                if self.site.lang in ('en', 'fr'):
                    # do not change "Image" on en-wiki and fr-wiki
                    for image in [u'Image', u'image']:
                        if image in namespaces:
                            namespaces.remove(image)
                if self.site.lang == 'hu':
                    # do not change "Kép" on hu-wiki
                    for image in [u'Kép', u'kép']:
                        if image in namespaces:
                            namespaces.remove(image)
                elif self.site.lang == 'pt':
                    # bug #3346901 should be implemented
                    continue
            # skip main (article) namespace
            if thisNs and namespaces:
                text = pywikibot.replaceExcept(
                    text,
                    r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]'
                    % '|'.join(namespaces),
                    r'[[%s:\g<nameAndLabel>]]' % thisNs,
                    exceptions)
        return text
Example #50
0
 def __iter__(self):
     try:
         for entry in self.parser:
             if self.skipping:
                 if entry.title != self.xmlStart:
                     continue
                 self.skipping = False
             if not self.isTitleExcepted(entry.title) \
                     and not self.isTextExcepted(entry.text):
                 new_text = entry.text
                 for old, new in self.replacements:
                     new_text = pywikibot.replaceExcept(new_text, old, new, self.excsInside, self.site)
                 if new_text != entry.text:
                     yield pywikibot.Page(self.site, entry.title)
     except KeyboardInterrupt:
         try:
             if not self.skipping:
                 pywikibot.output(
                     u'To resume, use "-xmlstart:%s" on the command line.'
                     % entry.title)
         except NameError:
             pass
Example #51
0
 def validXhtml(self, text):
     text = pywikibot.replaceExcept(text, r'(?i)<br[ /]*>', r'<br />',
                                    ['comment', 'math', 'nowiki', 'pre'])
     return text
Example #52
0
    def cleanUpLinks(self, text):
        # helper function which works on one link and either returns it
        # unmodified, or returns a replacement.
        def handleOneLink(match):
            titleWithSection = match.group('titleWithSection')
            label = match.group('label')
            trailingChars = match.group('linktrail')
            newline = match.group('newline')

            if not self.site.isInterwikiLink(titleWithSection):
                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                # We only work on namespace 0 because pipes and linktrails work
                # differently for images and categories.
                try:
                    page = pywikibot.Page(self.site, titleWithSection)
                #except pywikibot.InvalidTitle:
                except:  #empty self link occures
                    return match.group()
                if page.namespace() == 0:
                    # Replace underlines by spaces, also multiple underlines
                    titleWithSection = re.sub('_+', ' ', titleWithSection)
                    # Remove double spaces
                    titleWithSection = re.sub('  +', ' ', titleWithSection)
                    # Remove unnecessary leading spaces from title,
                    # but remember if we did this because we eventually want
                    # to re-add it outside of the link later.
                    titleLength = len(titleWithSection)
                    titleWithSection = titleWithSection.lstrip()
                    hadLeadingSpaces = (len(titleWithSection) != titleLength)
                    hadTrailingSpaces = False
                    # Remove unnecessary trailing spaces from title,
                    # but remember if we did this because it may affect
                    # the linktrail and because we eventually want to
                    # re-add it outside of the link later.
                    if not trailingChars:
                        titleLength = len(titleWithSection)
                        titleWithSection = titleWithSection.rstrip()
                        hadTrailingSpaces = (len(titleWithSection) !=
                                             titleLength)

                    # Convert URL-encoded characters to unicode
                    titleWithSection = pywikibot.url2unicode(titleWithSection,
                                                             site=self.site)

                    if titleWithSection == '':
                        # just skip empty links.
                        return match.group()

                    # Remove unnecessary initial and final spaces from label.
                    # Please note that some editors prefer spaces around pipes. (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
                    if label is not None:
                        # Remove unnecessary leading spaces from label,
                        # but remember if we did this because we want
                        # to re-add it outside of the link later.
                        labelLength = len(label)
                        label = label.lstrip()
                        hadLeadingSpaces = (len(label) != labelLength)
                        # Remove unnecessary trailing spaces from label,
                        # but remember if we did this because it affects
                        # the linktrail.
                        if not trailingChars:
                            labelLength = len(label)
                            label = label.rstrip()
                            hadTrailingSpaces = (len(label) != labelLength)
                    else:
                        label = titleWithSection
                    if trailingChars:
                        label += trailingChars

                    if titleWithSection == label or titleWithSection[0].lower(
                    ) + titleWithSection[1:] == label:
                        newLink = "[[%s]]" % label
                    # Check if we can create a link with trailing characters instead of a pipelink
                    elif len(titleWithSection) <= len(label) and label[:len(
                            titleWithSection)] == titleWithSection and re.sub(
                                trailR, '',
                                label[len(titleWithSection):]) == '':
                        newLink = "[[%s]]%s" % (label[:len(titleWithSection)],
                                                label[len(titleWithSection):])
                    else:
                        # Try to capitalize the first letter of the title.
                        # Maybe this feature is not useful for languages that
                        # don't capitalize nouns...
                        #if not self.site.nocapitalize:
                        if self.site.sitename() == 'wikipedia:de':
                            titleWithSection = titleWithSection[0].upper(
                            ) + titleWithSection[1:]
                        newLink = "[[%s|%s]]" % (titleWithSection, label)
                    # re-add spaces that were pulled out of the link.
                    # Examples:
                    #   text[[ title ]]text        -> text [[title]] text
                    #   text[[ title | name ]]text -> text [[title|name]] text
                    #   text[[ title |name]]text   -> text[[title|name]]text
                    #   text[[title| name]]text    -> text [[title|name]]text
                    if hadLeadingSpaces and not newline:
                        newLink = ' ' + newLink
                    if hadTrailingSpaces:
                        newLink = newLink + ' '
                    if newline:
                        newLink = newline + newLink
                    return newLink
            # don't change anything
            return match.group()

        trailR = re.compile(self.site.linktrail())
        # The regular expression which finds links. Results consist of four groups:
        # group title is the target page title, that is, everything before | or ].
        # group section is the page section. It'll include the # to make life easier for us.
        # group label is the alternative link title, that's everything between | and ].
        # group linktrail is the link trail, that's letters after ]] which are part of the word.
        # note that the definition of 'letter' varies from language to language.
        linkR = re.compile(
            r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>'
            + self.site.linktrail() + ')')

        text = pywikibot.replaceExcept(
            text, linkR, handleOneLink,
            ['comment', 'math', 'nowiki', 'pre', 'startspace'])
        return text