예제 #1
0
    def replace(self, match):
        template_name = match.group('template').replace('_', ' ').strip()
        if template_name.startswith(tuple(self.defaultsort)):
            return match.group()

        template_name_norm = first_upper(template_name).partition('<!--')[0]
        if template_name_norm not in self.cache:
            template = pywikibot.Page(self.site, template_name_norm, ns=10)
            try:
                do_replace = template.exists() and template.isRedirectPage()
            except pywikibot.exceptions.InvalidTitle:
                do_replace = False
            except pywikibot.exceptions.InconsistentTitleReceived:
                do_replace = False
            if do_replace:
                target = template.getRedirectTarget()
                self.cache[template_name_norm] = target.title(with_ns=False)
            else:
                self.cache[template_name_norm] = None

        target = self.cache[template_name_norm]
        if not target:
            return match.group()

        if template_name != first_upper(template_name):
            if all(part.islower() for part in target.partition(' ')[0][1:]
                   if part.isalpha()):
                target = first_lower(target)

        return match.group('before') + target + match.group('after')
예제 #2
0
def main():
    """Print environment variables."""
    _pwb_dir = os.path.abspath(os.path.join(
        os.path.split(__file__)[0], '..', '..'))
    _pwb_dir = first_upper(_pwb_dir)

    print('os.environ:')
    for k, v in sorted(os.environ.items()):
        # Don't leak the password into logs
        if k == 'USER_PASSWORD':
            continue
        # This only appears in subprocesses
        if k == 'PYWIKIBOT_DIR_PWB':
            continue
        print('{}: {}'.format(k, v))

    print('sys.path:')
    for path in sys.path:
        if path == '' or path.startswith('.'):
            continue
        # Normalise DOS drive letter
        path = first_upper(path)
        if path.startswith(_pwb_dir):
            continue
        print(path)
예제 #3
0
    def ReplaceLink(self, text, oldtxt, newtxt):

        frmParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)]
        toParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)]

        if len(frmParts) != len(toParts):
            raise ValueError("Splitting parts do not match counts")
        for i in xrange(0, len(frmParts)):
            if len(frmParts[i]) != len(toParts[i]):
                raise ValueError("Splitting parts do not match word length")
            if len(frmParts[i]) > 0:
                text = text.replace(first_lower(frmParts[i]), first_lower(toParts[i]))
                text = text.replace(first_upper(frmParts[i]), first_upper(toParts[i]))
        return text
예제 #4
0
    def ReplaceLink(self, text, oldtxt, newtxt):
        """Replace links."""
        frmParts = [s.strip(self.stripChars)
                    for s in self.wordBreaker.split(oldtxt)]
        toParts = [s.strip(self.stripChars)
                   for s in self.wordBreaker.split(newtxt)]

        if len(frmParts) != len(toParts):
            raise ValueError('Splitting parts do not match counts')
        for i, part in enumerate(frmParts):
            if part != len(toParts[i]):
                raise ValueError('Splitting parts do not match word length')
            if part:
                text = text.replace(first_lower(part), first_lower(toParts[i]))
                text = text.replace(first_upper(part), first_upper(toParts[i]))
        return text
예제 #5
0
    def replace(self, match):
        text = match.group()
        code = self.parser.parse(text, skip_style_tags=True)
        sections = []
        for header in code.ifilter_headings():
            name = header.title.strip()
            if name in self.replace_headers:
                name = self.replace_headers[name]
            if name in self.iter_all_headers():
                sections.append({
                    'name': first_upper(name),
                    'nodes': [header],
                })
            else:
                sections[:] = []
        if not sections:
            return text

        do_more = False
        first_index = min(
            code.nodes.index(sect['nodes'][0]) for sect in sections)
        last_index = self.add_contents(sections, code)
        do_more = self.deduplicate(sections, code) or do_more
        do_more = self.check_levels(sections, code) or do_more
        if do_more:
            sections.sort(key=self.sortkey)
        self.reorganize(sections, code)
        self.clean_empty(sections, code, do_more)
        code.nodes[first_index:last_index] = [
            node for sect in sections for node in sect['nodes']
        ]
        return str(code)
예제 #6
0
    def sametitle(self, title1: str, title2: str) -> bool:
        """
        Return True if title1 and title2 identify the same wiki page.

        title1 and title2 may be unequal but still identify the same page,
        if they use different aliases for the same namespace.
        """
        def ns_split(title):
            """Separate the namespace from the name."""
            ns, delim, name = title.partition(':')
            if delim:
                ns = self.namespaces.lookup_name(ns)
            if not delim or not ns:
                return default_ns, title
            return ns, name

        # Replace alias characters like underscores with title
        # delimiters like spaces and multiple combinations of them with
        # only one delimiter
        sep = self.family.title_delimiter_and_aliases[0]
        pattern = re.compile('[{}]+'.format(
            self.family.title_delimiter_and_aliases))
        title1 = pattern.sub(sep, title1)
        title2 = pattern.sub(sep, title2)
        if title1 == title2:
            return True

        default_ns = self.namespaces[0]
        # determine whether titles contain namespace prefixes
        ns1_obj, name1 = ns_split(title1)
        ns2_obj, name2 = ns_split(title2)
        if ns1_obj != ns2_obj:
            # pages in different namespaces
            return False

        name1 = name1.strip()
        name2 = name2.strip()
        # If the namespace has a case definition it's overriding the site's
        # case definition
        if ns1_obj.case == 'first-letter':
            name1 = first_upper(name1)
            name2 = first_upper(name2)
        return name1 == name2
예제 #7
0
    def ReplaceLink(self, text, oldtxt, newtxt):

        frmParts = [
            s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)
        ]
        toParts = [
            s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)
        ]

        if len(frmParts) != len(toParts):
            raise ValueError(u'Splitting parts do not match counts')
        for i in xrange(0, len(frmParts)):
            if len(frmParts[i]) != len(toParts[i]):
                raise ValueError(u'Splitting parts do not match word length')
            if len(frmParts[i]) > 0:
                text = text.replace(first_lower(frmParts[i]),
                                    first_lower(toParts[i]))
                text = text.replace(first_upper(frmParts[i]),
                                    first_upper(toParts[i]))
        return text
 def normalize(self, template):
     #return self.parser.normalize(template)
     return first_upper(template
                        .partition('<!--')[0]
                        .replace('_', ' ')
                        .strip())
예제 #9
0
        def handleOneLink(match):
            titleWithSection = match.group('titleWithSection')
            label = match.group('label')
            trailingChars = match.group('linktrail')
            newline = match.group('newline')

            try:
                is_interwiki = self.site.isInterwikiLink(titleWithSection)
            except ValueError:  # T111513
                is_interwiki = True

            if not is_interwiki:
                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                # We only work on namespace 0 because pipes and linktrails work
                # differently for images and categories.
                page = pywikibot.Page(
                    pywikibot.Link(titleWithSection, self.site))
                try:
                    namespace = page.namespace()
                except pywikibot.InvalidTitle:
                    return match.group()
                if namespace == 0:
                    # Replace underlines by spaces, also multiple underlines
                    titleWithSection = re.sub('_+', ' ', titleWithSection)
                    # Remove double spaces
                    titleWithSection = re.sub('  +', ' ', titleWithSection)
                    # Remove unnecessary leading spaces from title,
                    # but remember if we did this because we eventually want
                    # to re-add it outside of the link later.
                    titleLength = len(titleWithSection)
                    titleWithSection = titleWithSection.lstrip()
                    hadLeadingSpaces = (len(titleWithSection) != titleLength)
                    hadTrailingSpaces = False
                    # Remove unnecessary trailing spaces from title,
                    # but remember if we did this because it may affect
                    # the linktrail and because we eventually want to
                    # re-add it outside of the link later.
                    if not trailingChars:
                        titleLength = len(titleWithSection)
                        titleWithSection = titleWithSection.rstrip()
                        hadTrailingSpaces = (len(titleWithSection) !=
                                             titleLength)

                    # Convert URL-encoded characters to unicode
                    from pywikibot.page import url2unicode
                    titleWithSection = url2unicode(titleWithSection,
                                                   encodings=self.site)

                    if titleWithSection == '':
                        # just skip empty links.
                        return match.group()

                    # Remove unnecessary initial and final spaces from label.
                    # Please note that some editors prefer spaces around pipes.
                    # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
                    if label is not None:
                        # Remove unnecessary leading spaces from label,
                        # but remember if we did this because we want
                        # to re-add it outside of the link later.
                        labelLength = len(label)
                        label = label.lstrip()
                        hadLeadingSpaces = (len(label) != labelLength)
                        # Remove unnecessary trailing spaces from label,
                        # but remember if we did this because it affects
                        # the linktrail.
                        if not trailingChars:
                            labelLength = len(label)
                            label = label.rstrip()
                            hadTrailingSpaces = (len(label) != labelLength)
                    else:
                        label = titleWithSection
                    if trailingChars:
                        label += trailingChars

                    if self.site.siteinfo['case'] == 'first-letter':
                        firstcase_title = first_lower(titleWithSection)
                        firstcase_label = first_lower(label)
                    else:
                        firstcase_title = titleWithSection
                        firstcase_label = label

                    if firstcase_label == firstcase_title:
                        newLink = '[[%s]]' % label
                    # Check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (firstcase_label.startswith(firstcase_title) and
                          trailR.sub('', label[len(titleWithSection):]) == ''):
                        newLink = '[[%s]]%s' % (label[:len(titleWithSection)],
                                                label[len(titleWithSection):])

                    else:
                        # Try to capitalize the first letter of the title.
                        # Not useful for languages that don't capitalize nouns.
                        # TODO: Add a configuration variable for each site,
                        # which determines if the link target is written in
                        # uppercase
                        if self.site.sitename == 'wikipedia:de':
                            titleWithSection = first_upper(titleWithSection)
                        newLink = "[[%s|%s]]" % (titleWithSection, label)
                    # re-add spaces that were pulled out of the link.
                    # Examples:
                    #   text[[ title ]]text        -> text [[title]] text
                    #   text[[ title | name ]]text -> text [[title|name]] text
                    #   text[[ title |name]]text   -> text[[title|name]]text
                    #   text[[title| name]]text    -> text [[title|name]]text
                    if hadLeadingSpaces and not newline:
                        newLink = ' ' + newLink
                    if hadTrailingSpaces:
                        newLink = newLink + ' '
                    if newline:
                        newLink = newline + newLink
                    return newLink
            # don't change anything
            return match.group()
        def handleOneLink(match):
            titleWithSection = match.group('titleWithSection')
            label = match.group('label')
            trailingChars = match.group('linktrail')
            newline = match.group('newline')

            if not self.site.isInterwikiLink(titleWithSection):
                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                # We only work on namespace 0 because pipes and linktrails work
                # differently for images and categories.
                page = pywikibot.Page(pywikibot.Link(titleWithSection,
                                                     self.site))
                try:
                    namespace = page.namespace()
                except pywikibot.InvalidTitle:
                    return match.group()
                if namespace == 0:
                    # Replace underlines by spaces, also multiple underlines
                    titleWithSection = re.sub('_+', ' ', titleWithSection)
                    # Remove double spaces
                    titleWithSection = re.sub('  +', ' ', titleWithSection)
                    # Remove unnecessary leading spaces from title,
                    # but remember if we did this because we eventually want
                    # to re-add it outside of the link later.
                    titleLength = len(titleWithSection)
                    titleWithSection = titleWithSection.lstrip()
                    hadLeadingSpaces = (len(titleWithSection) != titleLength)
                    hadTrailingSpaces = False
                    # Remove unnecessary trailing spaces from title,
                    # but remember if we did this because it may affect
                    # the linktrail and because we eventually want to
                    # re-add it outside of the link later.
                    if not trailingChars:
                        titleLength = len(titleWithSection)
                        titleWithSection = titleWithSection.rstrip()
                        hadTrailingSpaces = (len(titleWithSection) !=
                                             titleLength)

                    # Convert URL-encoded characters to unicode
                    from pywikibot.page import url2unicode
                    titleWithSection = url2unicode(titleWithSection,
                                                   encodings=self.site)

                    if titleWithSection == '':
                        # just skip empty links.
                        return match.group()

                    # Remove unnecessary initial and final spaces from label.
                    # Please note that some editors prefer spaces around pipes.
                    # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
                    if label is not None:
                        # Remove unnecessary leading spaces from label,
                        # but remember if we did this because we want
                        # to re-add it outside of the link later.
                        labelLength = len(label)
                        label = label.lstrip()
                        hadLeadingSpaces = (len(label) != labelLength)
                        # Remove unnecessary trailing spaces from label,
                        # but remember if we did this because it affects
                        # the linktrail.
                        if not trailingChars:
                            labelLength = len(label)
                            label = label.rstrip()
                            hadTrailingSpaces = (len(label) != labelLength)
                    else:
                        label = titleWithSection
                    if trailingChars:
                        label += trailingChars

                    if titleWithSection == label or \
                       first_lower(titleWithSection) == label:
                        newLink = "[[%s]]" % label
                    # Check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (len(titleWithSection) <= len(label) and
                          label[:len(titleWithSection)] == titleWithSection and
                          re.sub(trailR, '',
                                 label[len(titleWithSection):]) == ''):
                        newLink = "[[%s]]%s" % (label[:len(titleWithSection)],
                                                label[len(titleWithSection):])
                    else:
                        # Try to capitalize the first letter of the title.
                        # Not useful for languages that don't capitalize nouns.
                        # TODO: Add a configuration variable for each site,
                        # which determines if the link target is written in
                        # uppercase
                        if self.site.sitename == 'wikipedia:de':
                            titleWithSection = first_upper(titleWithSection)
                        newLink = "[[%s|%s]]" % (titleWithSection, label)
                    # re-add spaces that were pulled out of the link.
                    # Examples:
                    #   text[[ title ]]text        -> text [[title]] text
                    #   text[[ title | name ]]text -> text [[title|name]] text
                    #   text[[ title |name]]text   -> text[[title|name]]text
                    #   text[[title| name]]text    -> text [[title|name]]text
                    if hadLeadingSpaces and not newline:
                        newLink = ' ' + newLink
                    if hadTrailingSpaces:
                        newLink = newLink + ' '
                    if newline:
                        newLink = newline + newLink
                    return newLink
            # don't change anything
            return match.group()
예제 #11
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Script that forms part of pwb_tests."""
from __future__ import absolute_import, unicode_literals

import os
import sys

from pywikibot.tools import first_upper

_pwb_dir = os.path.abspath(os.path.join(
    os.path.split(__file__)[0], '..', '..'))
_pwb_dir = first_upper(_pwb_dir)

print('os.environ:')
for k, v in sorted(os.environ.items()):
    # Don't leak the password into logs
    if k == 'USER_PASSWORD':
        continue
    # This only appears in subprocesses
    if k in ['PYWIKIBOT2_DIR_PWB']:
        continue
    print("{0!r}: {1!r}".format(k, v))

print('sys.path:')
for path in sys.path:
    if path == '' or path.startswith('.'):
        continue
    # Normalise DOS drive letter
    path = first_upper(path)
    if path.startswith(_pwb_dir):
예제 #12
0
                    "Couldn't determine the item for values {}/{} ({} items)".
                    format(params[0], params[1], len(items)))
                continue
            item = items.pop()
            if params[2] != item.getID():  # 3rd param is index 2
                template.add(3, item.getID())
                change = True
            if index['název'] is not None:
                title_cell = cells[index['název']]
                nodes = title_cell.contents.nodes
                # fixme: ignore &nbsp;
                #wikilinks = title_cell.contents.filter_wikilinks()
                #if not wikilinks:
                if len(nodes) == 1:
                    match = titleR.fullmatch(str(nodes[0]))
                    link = item.sitelinks.get(page.site)
                    if link and match:
                        groups = match.groups()
                        if first_upper(groups[1]) == link.title:
                            new = '{}[[{}]]{}'.format(*groups)
                        else:
                            new = '{1}[[{0}|{2}]]{3}'.format(
                                link.title, *groups)
                        title_cell.contents.replace(nodes[0], new)
                        change = True

    if change:
        page.text = str(code)
        page.save(summary='doplnění článků a/nebo položek na Wikidatech',
                  asynchronous=True)
 def normalize(self, template):
     #return self.parser.normalize(template)
     return first_upper(template
                        .partition('<!--')[0]
                        .replace('_', ' ')
                        .strip())
예제 #14
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Script that forms part of pwb_tests."""
from __future__ import absolute_import, unicode_literals

import os
import sys
from pywikibot.tools import first_upper

_pwb_dir = os.path.abspath(os.path.join(
    os.path.split(__file__)[0], '..', '..'))
_pwb_dir = first_upper(_pwb_dir)

print('os.environ:')
for k, v in sorted(os.environ.items()):
    # Don't leak the password into logs
    if k == 'USER_PASSWORD':
        continue
    # This only appears in subprocesses
    if k in ['PYWIKIBOT2_DIR_PWB']:
        continue
    print("%r: %r" % (k, v))

print('sys.path:')
for path in sys.path:
    if path == '' or path.startswith('.'):
        continue
    # Normalise DOS drive letter
    path = first_upper(path)
    if path.startswith(_pwb_dir):
        continue