def test_extract_templates_params_regex(self):
        """Test using many complex regexes."""
        func = functools.partial(textlib.extract_templates_and_params_regex,
                                 remove_disabled_parts=False)
        self._common_results(func)
        self._order_differs(func)

        self.assertEqual(func('{{a|b={} }}'), [])  # FIXME: {} is normal text

        self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict(
            (('b', 'c'), )))])

        func = textlib.extract_templates_and_params_regex
        self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
                         [('a', OrderedDict((('b', ''), )))])

        # Identical to mwpfh
        self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
                              [('c', OrderedDict((('1', '{{d}}'), ))),
                               ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
                               ('d', OrderedDict())])

        # However fails to correctly handle three levels of balanced brackets
        # with empty parameters
        self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
                              [('c', OrderedDict((('1', '{{d|}}}'), ))),
                               ('d', OrderedDict([('1', '}')]))])
    def _etp_regex_differs(self, func):
        """Common cases not handled the same by ETP_REGEX."""
        self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(
            ((' b', 'c'), )))])
        self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict(
            (('b ', 'c'), )))])
        self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict(
            (('b', ' c'), )))])
        self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict(
            (('b', 'c '), )))])

        # inner {} should be treated as part of the value
        self.assertEqual(func('{{a|b={} }}'),
                         [('a', OrderedDict((('b', '{} '), )))])
Example #3
0
    def treat_property_and_talk(self, prop, page):
        self.current_talk_page = page
        # todo: skip sandbox properties
        # todo: removeDisabledParts now?
        code = mwparserfromhell.parse(page.text, skip_style_tags=True)
        for template in code.ifilter_templates():
            if not template.name.matches(self.template_metadata):
                continue
            params = OrderedDict()
            for param in template.params:
                params[str(param.name).strip()] = str(param.value).strip()
            break
        else:
            pywikibot.output('Template "{}" not found'.format(
                self.template_metadata))
            return

        keys = set(self.func_dict.keys()) & set(params.keys())
        # formatter URL must go before example
        if {'formatter URL', 'example'} <= keys:
            keys.remove('formatter URL')
            keys = ['formatter URL'] + list(keys)

        clear_params = []
        for key in keys:
            param = textlib.removeDisabledParts(params[key])
            if param == '-':
                continue
            if param != '':
                pywikibot.output('Found param "{}"'.format(key))
                try:
                    remove = self.func_dict[key](param)
                except pywikibot.data.api.APIError as exc:
                    remove = False
                if remove:
                    clear_params.append(key)
        if self.getOption('importonly'):
            return

        for par in clear_params:
            template.remove(par, keep_field=True)
        for par in set(params.keys()) & set(self.obsolete_params):
            template.remove(par)

        self.current_page = self.current_talk_page
        self.put_current(str(code),
                         show_diff=True,
                         summary='removing migrated/obsolete parameters')
    def treat_property_and_talk(self, prop, page):
        self.current_talk_page = page
        # todo: skip sandbox properties
        # todo: removeDisabledParts now?
        code = mwparserfromhell.parse(page.text, skip_style_tags=True)
        for template in code.ifilter_templates():
            if not template.name.matches(self.template_metadata):
                continue
            params = OrderedDict()
            for param in template.params:
                params[str(param.name).strip()] = str(param.value).strip()
            break
        else:
            pywikibot.output('Template "{}" not found'.format(
                self.template_metadata))
            return

        keys = set(self.func_dict.keys()) & set(params.keys())
        # formatter URL must go before example
        if {'formatter URL', 'example'} <= keys:
            keys.remove('formatter URL')
            keys = ['formatter URL'] + list(keys)

        clear_params = []
        for key in keys:
            param = textlib.removeDisabledParts(params[key])
            if param == '-':
                continue
            if param != '':
                pywikibot.output('Found param "{}"'.format(key))
                try:
                    remove = self.func_dict[key](param)
                except pywikibot.data.api.APIError as exc:
                    remove = False
                if remove:
                    clear_params.append(key)
        if self.getOption('importonly'):
            return

        for par in clear_params:
            template.remove(par, keep_field=True)
        for par in set(params.keys()) & set(self.obsolete_params):
            template.remove(par)

        self.current_page = self.current_talk_page
        self.put_current(str(code), show_diff=True,
                         summary='removing migrated/obsolete parameters')
    def _order_differs(self, func):
        """Common cases where the order of templates differs."""
        self.assertCountEqual(func('{{a|b={{c}}}}'),
                              [('a', OrderedDict((('b', '{{c}}'), ))),
                               ('c', OrderedDict())])

        self.assertCountEqual(func('{{a|{{c|d}}}}'),
                              [('c', OrderedDict((('1', 'd'), ))),
                               ('a', OrderedDict([('1', '{{c|d}}')]))])

        # inner '}' after {{b|c}} should be treated as wikitext
        self.assertCountEqual(
            func('{{a|{{b|c}}}|d}}'),
            [('a', OrderedDict([('1', '{{b|c}}}'), ('2', u'd')])),
             ('b', OrderedDict([('1', 'c')]))])
Example #6
0
    def test_extract_templates_params_mwpfh(self):
        """Test using mwparserfromhell."""
        func = textlib.extract_templates_and_params_mwpfh
        self._common_results(func)
        self._order_differs(func)
        self._etp_regex_differs(func)

        self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
                              [('c', OrderedDict((('1', '{{d}}'), ))),
                               ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
                               ('d', OrderedDict())])

        self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
                              [('c', OrderedDict((('1', '{{d|}}'), ))),
                               ('a', OrderedDict([('1', '{{c|{{d|}}}}')])),
                               ('d', OrderedDict([('1', '')]))])
    def test_template_simple_regex(self):
        """Test using simple regex."""
        func = textlib.extract_templates_and_params_regex_simple
        self._common_results(func)
        self._etp_regex_differs(func)

        # The simple regex copies the whitespace of mwpfh, but does
        # not have additional entries for nested templates.
        self.assertEqual(func('{{a| b={{c}}}}'),
                         [('a', OrderedDict(((' b', '{{c}}'), )))])
        self.assertEqual(func('{{a|b={{c}}}}'),
                         [('a', OrderedDict((('b', '{{c}}'), )))])
        self.assertEqual(func('{{a|b= {{c}}}}'),
                         [('a', OrderedDict((('b', ' {{c}}'), )))])
        self.assertEqual(func('{{a|b={{c}} }}'),
                         [('a', OrderedDict((('b', '{{c}} '), )))])

        # These three are from _order_differs, and while the first works
        self.assertEqual(func('{{a|{{c}} }}'),
                         [('a', OrderedDict((('1', '{{c}} '), )))])

        # an inner '|' causes extract_template_and_params_regex_simple to
        # split arguments incorrectly in the next two cases.
        self.assertEqual(func('{{a|{{c|d}} }}'),
                         [('a', OrderedDict([('1', '{{c'), ('2', 'd}} ')]))])

        self.assertEqual(
            func('{{a|{{b|c}}}|d}}'),
            [(u'a', OrderedDict([('1', u'{{b'), ('2', u'c}}}'),
                                 ('3', u'd')]))])

        # Safe fallback to handle arbitary template levels
        # by merging top level templates together.
        # i.e. 'b' is not recognised as a template, and 'foo' is also
        # consumed as part of 'a'.
        self.assertEqual(func('{{a|{{c|{{d|{{e|}}}} }} }} foo {{b}}'),
                         [(None, OrderedDict())])
    def test_extract_templates_params_mwpfh(self):
        """Test using mwparserfromhell."""
        if isinstance(mwparserfromhell, ImportError):
            raise unittest.SkipTest('mwparserfromhell not available')

        func = textlib.extract_templates_and_params_mwpfh
        self._common_results(func)
        self._order_differs(func)
        self._etp_regex_differs(func)

        self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
                              [('c', OrderedDict((('1', '{{d}}'), ))),
                               ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
                               ('d', OrderedDict())])

        self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
                              [('c', OrderedDict((('1', '{{d|}}'), ))),
                               ('a', OrderedDict([('1', '{{c|{{d|}}}}')])),
                               ('d', OrderedDict([('1', '')]))])
    def _common_results(self, func):
        """Common cases."""
        self.assertEqual(func('{{a}}'), [('a', OrderedDict())])
        self.assertEqual(func('{{ a}}'), [('a', OrderedDict())])
        self.assertEqual(func('{{a }}'), [('a', OrderedDict())])
        self.assertEqual(func('{{ a }}'), [('a', OrderedDict())])

        self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b|c=d}}'),
                         [('a', OrderedDict((('1', 'b'), ('c', 'd'))))])
        self.assertEqual(
            func('{{a|b=c|f=g|d=e|1=}}'),
            [('a', OrderedDict(
                (('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))])
        self.assertEqual(func('{{a|1=2|c=d}}'),
                         [('a', OrderedDict((('1', '2'), ('c', 'd'))))])
        self.assertEqual(func('{{a|c=d|1=2}}'),
                         [('a', OrderedDict((('c', 'd'), ('1', '2'))))])
        self.assertEqual(func('{{a|5=d|a=b}}'),
                         [('a', OrderedDict((('5', 'd'), ('a', 'b'))))])
        self.assertEqual(func('{{a|=2}}'), [('a', OrderedDict((('', '2'), )))])

        self.assertEqual(func('{{a|}}'), [('a', OrderedDict((('1', ''), )))])
        self.assertEqual(func('{{a|=|}}'),
                         [('a', OrderedDict((('', ''), ('1', ''))))])
        self.assertEqual(func('{{a||}}'),
                         [('a', OrderedDict((('1', ''), ('2', ''))))])

        self.assertEqual(func('{{a|b={{{1}}}}}'),
                         [('a', OrderedDict((('b', '{{{1}}}'), )))])
        self.assertEqual(
            func('{{a|b=<noinclude>{{{1}}}</noinclude>}}'),
            [('a', OrderedDict((('b', '<noinclude>{{{1}}}</noinclude>'), )))])
        self.assertEqual(func('{{subst:a|b=c}}'),
                         [('subst:a', OrderedDict((('b', 'c'), )))])
        self.assertEqual(func('{{safesubst:a|b=c}}'),
                         [('safesubst:a', OrderedDict((('b', 'c'), )))])
        self.assertEqual(func('{{msgnw:a|b=c}}'),
                         [('msgnw:a', OrderedDict((('b', 'c'), )))])
        self.assertEqual(func('{{Template:a|b=c}}'),
                         [('Template:a', OrderedDict((('b', 'c'), )))])
        self.assertEqual(func('{{template:a|b=c}}'),
                         [('template:a', OrderedDict((('b', 'c'), )))])
        self.assertEqual(func('{{:a|b=c}}'), [(':a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{subst::a|b=c}}'),
                         [('subst::a', OrderedDict((('b', 'c'), )))])

        self.assertEqual(
            func('{{a|b={{{1}}}|c={{{2}}}}}'),
            [('a', OrderedDict((('b', '{{{1}}}'), ('c', '{{{2}}}'))))])
        self.assertEqual(func('{{a|b=c}}{{d|e=f}}'),
                         [('a', OrderedDict((('b', 'c'), ))),
                          ('d', OrderedDict((('e', 'f'), )))])

        self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
                         [('a', OrderedDict((('b', '<!--{{{1}}}-->'), )))])

        # initial '{' and '}' should be ignored as outer wikitext
        self.assertEqual(func('{{{a|b}}X}'), [('a', OrderedDict(
            (('1', 'b'), )))])

        # sf.net bug 1575: unclosed template
        self.assertEqual(func('{{a'), [])
        self.assertEqual(func('{{a}}{{foo|'), [('a', OrderedDict())])
Example #10
0
    def test_extract_templates_params_regex(self):
        func = textlib.extract_templates_and_params_regex
        self._extract_templates_params(func)

        self.assertEqual(func('{{a|}}'), [])  # FIXME: this is a bug

        self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict(
            (('b', 'c'), )))])
        self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict(
            (('b', 'c'), )))])

        self.assertEqual(func('{{a| b={{c}}}}'),
                         [('c', OrderedDict()),
                          ('a', OrderedDict((('b', '{{c}}'), )))])
        self.assertEqual(func('{{a|b={{c}}}}'),
                         [('c', OrderedDict()),
                          ('a', OrderedDict((('b', '{{c}}'), )))])
        self.assertEqual(func('{{a|b= {{c}}}}'),
                         [('c', OrderedDict()),
                          ('a', OrderedDict((('b', '{{c}}'), )))])
        self.assertEqual(func('{{a|b={{c}} }}'),
                         [('c', OrderedDict()),
                          ('a', OrderedDict((('b', '{{c}}'), )))])

        self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
                         [('a', OrderedDict((('b', ''), )))])
Example #11
0
    def test_extract_templates_params_mwpfh(self):
        try:
            import mwparserfromhell  # noqa
        except ImportError:
            raise unittest.SkipTest('mwparserfromhell not available')

        func = textlib.extract_templates_and_params_mwpfh
        self._extract_templates_params(func)

        self.assertEqual(func('{{a|}}'), [('a', OrderedDict((('1', ''), )))])

        self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(
            ((' b', 'c'), )))])
        self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict(
            (('b ', 'c'), )))])
        self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict(
            (('b', ' c'), )))])
        self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict(
            (('b', 'c '), )))])

        self.assertEqual(func('{{a| b={{c}}}}'),
                         [('a', OrderedDict(((' b', '{{c}}'), ))),
                          ('c', OrderedDict())])
        self.assertEqual(func('{{a|b={{c}}}}'),
                         [('a', OrderedDict((('b', '{{c}}'), ))),
                          ('c', OrderedDict())])
        self.assertEqual(func('{{a|b= {{c}}}}'),
                         [('a', OrderedDict((('b', ' {{c}}'), ))),
                          ('c', OrderedDict())])
        self.assertEqual(func('{{a|b={{c}} }}'),
                         [('a', OrderedDict((('b', '{{c}} '), ))),
                          ('c', OrderedDict())])

        self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
                         [('a', OrderedDict((('b', '<!--{{{1}}}-->'), )))])
Example #12
0
 def _extract_templates_params(self, func):
     self.assertEqual(func('{{a}}'), [('a', OrderedDict())])
     self.assertEqual(func('{{ a}}'), [('a', OrderedDict())])
     self.assertEqual(func('{{a }}'), [('a', OrderedDict())])
     self.assertEqual(func('{{ a }}'), [('a', OrderedDict())])
     self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict(
         (('b', 'c'), )))])
     self.assertEqual(func('{{a|b|c=d}}'),
                      [('a', OrderedDict((('1', 'b'), ('c', 'd'))))])
     self.assertEqual(
         func('{{a|b=c|f=g|d=e|1=}}'),
         [('a', OrderedDict(
             (('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))])
     self.assertEqual(func('{{a|1=2|c=d}}'),
                      [('a', OrderedDict((('1', '2'), ('c', 'd'))))])
     self.assertEqual(func('{{a|c=d|1=2}}'),
                      [('a', OrderedDict((('c', 'd'), ('1', '2'))))])
     self.assertEqual(func('{{a|5=d|a=b}}'),
                      [('a', OrderedDict((('5', 'd'), ('a', 'b'))))])
     self.assertEqual(func('{{a|=2}}'), [('a', OrderedDict((('', '2'), )))])
     self.assertEqual(func('{{a|=|}}'),
                      [('a', OrderedDict((('', ''), ('1', ''))))])
     self.assertEqual(func('{{a||}}'),
                      [('a', OrderedDict((('1', ''), ('2', ''))))])
     self.assertEqual(func('{{a|b={{{1}}}}}'),
                      [('a', OrderedDict((('b', '{{{1}}}'), )))])
     self.assertEqual(
         func('{{a|b=<noinclude>{{{1}}}</noinclude>}}'),
         [('a', OrderedDict((('b', '<noinclude>{{{1}}}</noinclude>'), )))])
     self.assertEqual(func('{{subst:a|b=c}}'),
                      [('subst:a', OrderedDict((('b', 'c'), )))])
     self.assertEqual(func('{{safesubst:a|b=c}}'),
                      [('safesubst:a', OrderedDict((('b', 'c'), )))])
     self.assertEqual(func('{{msgnw:a|b=c}}'),
                      [('msgnw:a', OrderedDict((('b', 'c'), )))])
     self.assertEqual(func('{{Template:a|b=c}}'),
                      [('Template:a', OrderedDict((('b', 'c'), )))])
     self.assertEqual(func('{{template:a|b=c}}'),
                      [('template:a', OrderedDict((('b', 'c'), )))])
     self.assertEqual(func('{{:a|b=c}}'), [(':a', OrderedDict(
         (('b', 'c'), )))])
     self.assertEqual(func('{{subst::a|b=c}}'),
                      [('subst::a', OrderedDict((('b', 'c'), )))])