def test_extract_templates_params_regex(self): """Test using many complex regexes.""" func = functools.partial(textlib.extract_templates_and_params_regex, remove_disabled_parts=False) self._common_results(func) self._order_differs(func) self.assertEqual(func('{{a|b={} }}'), []) # FIXME: {} is normal text self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict( (('b', 'c'), )))]) func = textlib.extract_templates_and_params_regex self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', OrderedDict((('b', ''), )))]) # Identical to mwpfh self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'), [('c', OrderedDict((('1', '{{d}}'), ))), ('a', OrderedDict([('1', '{{c|{{d}}}}')])), ('d', OrderedDict())]) # However fails to correctly handle three levels of balanced brackets # with empty parameters self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'), [('c', OrderedDict((('1', '{{d|}}}'), ))), ('d', OrderedDict([('1', '}')]))])
def _etp_regex_differs(self, func): """Common cases not handled the same by ETP_REGEX.""" self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict( ((' b', 'c'), )))]) self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict( (('b ', 'c'), )))]) self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict( (('b', ' c'), )))]) self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict( (('b', 'c '), )))]) # inner {} should be treated as part of the value self.assertEqual(func('{{a|b={} }}'), [('a', OrderedDict((('b', '{} '), )))])
def treat_property_and_talk(self, prop, page): self.current_talk_page = page # todo: skip sandbox properties # todo: removeDisabledParts now? code = mwparserfromhell.parse(page.text, skip_style_tags=True) for template in code.ifilter_templates(): if not template.name.matches(self.template_metadata): continue params = OrderedDict() for param in template.params: params[str(param.name).strip()] = str(param.value).strip() break else: pywikibot.output('Template "{}" not found'.format( self.template_metadata)) return keys = set(self.func_dict.keys()) & set(params.keys()) # formatter URL must go before example if {'formatter URL', 'example'} <= keys: keys.remove('formatter URL') keys = ['formatter URL'] + list(keys) clear_params = [] for key in keys: param = textlib.removeDisabledParts(params[key]) if param == '-': continue if param != '': pywikibot.output('Found param "{}"'.format(key)) try: remove = self.func_dict[key](param) except pywikibot.data.api.APIError as exc: remove = False if remove: clear_params.append(key) if self.getOption('importonly'): return for par in clear_params: template.remove(par, keep_field=True) for par in set(params.keys()) & set(self.obsolete_params): template.remove(par) self.current_page = self.current_talk_page self.put_current(str(code), show_diff=True, summary='removing migrated/obsolete parameters')
def _order_differs(self, func): """Common cases where the order of templates differs.""" self.assertCountEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', '{{c}}'), ))), ('c', OrderedDict())]) self.assertCountEqual(func('{{a|{{c|d}}}}'), [('c', OrderedDict((('1', 'd'), ))), ('a', OrderedDict([('1', '{{c|d}}')]))]) # inner '}' after {{b|c}} should be treated as wikitext self.assertCountEqual( func('{{a|{{b|c}}}|d}}'), [('a', OrderedDict([('1', '{{b|c}}}'), ('2', u'd')])), ('b', OrderedDict([('1', 'c')]))])
def test_extract_templates_params_mwpfh(self): """Test using mwparserfromhell.""" func = textlib.extract_templates_and_params_mwpfh self._common_results(func) self._order_differs(func) self._etp_regex_differs(func) self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'), [('c', OrderedDict((('1', '{{d}}'), ))), ('a', OrderedDict([('1', '{{c|{{d}}}}')])), ('d', OrderedDict())]) self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'), [('c', OrderedDict((('1', '{{d|}}'), ))), ('a', OrderedDict([('1', '{{c|{{d|}}}}')])), ('d', OrderedDict([('1', '')]))])
def test_template_simple_regex(self): """Test using simple regex.""" func = textlib.extract_templates_and_params_regex_simple self._common_results(func) self._etp_regex_differs(func) # The simple regex copies the whitespace of mwpfh, but does # not have additional entries for nested templates. self.assertEqual(func('{{a| b={{c}}}}'), [('a', OrderedDict(((' b', '{{c}}'), )))]) self.assertEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', '{{c}}'), )))]) self.assertEqual(func('{{a|b= {{c}}}}'), [('a', OrderedDict((('b', ' {{c}}'), )))]) self.assertEqual(func('{{a|b={{c}} }}'), [('a', OrderedDict((('b', '{{c}} '), )))]) # These three are from _order_differs, and while the first works self.assertEqual(func('{{a|{{c}} }}'), [('a', OrderedDict((('1', '{{c}} '), )))]) # an inner '|' causes extract_template_and_params_regex_simple to # split arguments incorrectly in the next two cases. self.assertEqual(func('{{a|{{c|d}} }}'), [('a', OrderedDict([('1', '{{c'), ('2', 'd}} ')]))]) self.assertEqual( func('{{a|{{b|c}}}|d}}'), [(u'a', OrderedDict([('1', u'{{b'), ('2', u'c}}}'), ('3', u'd')]))]) # Safe fallback to handle arbitary template levels # by merging top level templates together. # i.e. 'b' is not recognised as a template, and 'foo' is also # consumed as part of 'a'. self.assertEqual(func('{{a|{{c|{{d|{{e|}}}} }} }} foo {{b}}'), [(None, OrderedDict())])
def test_extract_templates_params_mwpfh(self): """Test using mwparserfromhell.""" if isinstance(mwparserfromhell, ImportError): raise unittest.SkipTest('mwparserfromhell not available') func = textlib.extract_templates_and_params_mwpfh self._common_results(func) self._order_differs(func) self._etp_regex_differs(func) self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'), [('c', OrderedDict((('1', '{{d}}'), ))), ('a', OrderedDict([('1', '{{c|{{d}}}}')])), ('d', OrderedDict())]) self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'), [('c', OrderedDict((('1', '{{d|}}'), ))), ('a', OrderedDict([('1', '{{c|{{d|}}}}')])), ('d', OrderedDict([('1', '')]))])
def _common_results(self, func): """Common cases.""" self.assertEqual(func('{{a}}'), [('a', OrderedDict())]) self.assertEqual(func('{{ a}}'), [('a', OrderedDict())]) self.assertEqual(func('{{a }}'), [('a', OrderedDict())]) self.assertEqual(func('{{ a }}'), [('a', OrderedDict())]) self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), ('c', 'd'))))]) self.assertEqual( func('{{a|b=c|f=g|d=e|1=}}'), [('a', OrderedDict( (('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))]) self.assertEqual(func('{{a|1=2|c=d}}'), [('a', OrderedDict((('1', '2'), ('c', 'd'))))]) self.assertEqual(func('{{a|c=d|1=2}}'), [('a', OrderedDict((('c', 'd'), ('1', '2'))))]) self.assertEqual(func('{{a|5=d|a=b}}'), [('a', OrderedDict((('5', 'd'), ('a', 'b'))))]) self.assertEqual(func('{{a|=2}}'), [('a', OrderedDict((('', '2'), )))]) self.assertEqual(func('{{a|}}'), [('a', OrderedDict((('1', ''), )))]) self.assertEqual(func('{{a|=|}}'), [('a', OrderedDict((('', ''), ('1', ''))))]) self.assertEqual(func('{{a||}}'), [('a', OrderedDict((('1', ''), ('2', ''))))]) self.assertEqual(func('{{a|b={{{1}}}}}'), [('a', OrderedDict((('b', '{{{1}}}'), )))]) self.assertEqual( func('{{a|b=<noinclude>{{{1}}}</noinclude>}}'), [('a', OrderedDict((('b', '<noinclude>{{{1}}}</noinclude>'), )))]) self.assertEqual(func('{{subst:a|b=c}}'), [('subst:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{safesubst:a|b=c}}'), [('safesubst:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{msgnw:a|b=c}}'), [('msgnw:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{Template:a|b=c}}'), [('Template:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{template:a|b=c}}'), [('template:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{:a|b=c}}'), [(':a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{subst::a|b=c}}'), [('subst::a', OrderedDict((('b', 'c'), )))]) self.assertEqual( func('{{a|b={{{1}}}|c={{{2}}}}}'), [('a', OrderedDict((('b', '{{{1}}}'), ('c', '{{{2}}}'))))]) self.assertEqual(func('{{a|b=c}}{{d|e=f}}'), [('a', OrderedDict((('b', 'c'), ))), ('d', OrderedDict((('e', 'f'), )))]) self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', OrderedDict((('b', '<!--{{{1}}}-->'), )))]) # initial '{' and '}' should be ignored as outer wikitext self.assertEqual(func('{{{a|b}}X}'), [('a', OrderedDict( (('1', 'b'), )))]) # sf.net bug 1575: unclosed template self.assertEqual(func('{{a'), []) self.assertEqual(func('{{a}}{{foo|'), [('a', OrderedDict())])
def test_extract_templates_params_regex(self): func = textlib.extract_templates_and_params_regex self._extract_templates_params(func) self.assertEqual(func('{{a|}}'), []) # FIXME: this is a bug self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a| b={{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) self.assertEqual(func('{{a|b={{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) self.assertEqual(func('{{a|b= {{c}}}}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) self.assertEqual(func('{{a|b={{c}} }}'), [('c', OrderedDict()), ('a', OrderedDict((('b', '{{c}}'), )))]) self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', OrderedDict((('b', ''), )))])
def test_extract_templates_params_mwpfh(self): try: import mwparserfromhell # noqa except ImportError: raise unittest.SkipTest('mwparserfromhell not available') func = textlib.extract_templates_and_params_mwpfh self._extract_templates_params(func) self.assertEqual(func('{{a|}}'), [('a', OrderedDict((('1', ''), )))]) self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict( ((' b', 'c'), )))]) self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict( (('b ', 'c'), )))]) self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict( (('b', ' c'), )))]) self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict( (('b', 'c '), )))]) self.assertEqual(func('{{a| b={{c}}}}'), [('a', OrderedDict(((' b', '{{c}}'), ))), ('c', OrderedDict())]) self.assertEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', '{{c}}'), ))), ('c', OrderedDict())]) self.assertEqual(func('{{a|b= {{c}}}}'), [('a', OrderedDict((('b', ' {{c}}'), ))), ('c', OrderedDict())]) self.assertEqual(func('{{a|b={{c}} }}'), [('a', OrderedDict((('b', '{{c}} '), ))), ('c', OrderedDict())]) self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', OrderedDict((('b', '<!--{{{1}}}-->'), )))])
def _extract_templates_params(self, func): self.assertEqual(func('{{a}}'), [('a', OrderedDict())]) self.assertEqual(func('{{ a}}'), [('a', OrderedDict())]) self.assertEqual(func('{{a }}'), [('a', OrderedDict())]) self.assertEqual(func('{{ a }}'), [('a', OrderedDict())]) self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), ('c', 'd'))))]) self.assertEqual( func('{{a|b=c|f=g|d=e|1=}}'), [('a', OrderedDict( (('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))]) self.assertEqual(func('{{a|1=2|c=d}}'), [('a', OrderedDict((('1', '2'), ('c', 'd'))))]) self.assertEqual(func('{{a|c=d|1=2}}'), [('a', OrderedDict((('c', 'd'), ('1', '2'))))]) self.assertEqual(func('{{a|5=d|a=b}}'), [('a', OrderedDict((('5', 'd'), ('a', 'b'))))]) self.assertEqual(func('{{a|=2}}'), [('a', OrderedDict((('', '2'), )))]) self.assertEqual(func('{{a|=|}}'), [('a', OrderedDict((('', ''), ('1', ''))))]) self.assertEqual(func('{{a||}}'), [('a', OrderedDict((('1', ''), ('2', ''))))]) self.assertEqual(func('{{a|b={{{1}}}}}'), [('a', OrderedDict((('b', '{{{1}}}'), )))]) self.assertEqual( func('{{a|b=<noinclude>{{{1}}}</noinclude>}}'), [('a', OrderedDict((('b', '<noinclude>{{{1}}}</noinclude>'), )))]) self.assertEqual(func('{{subst:a|b=c}}'), [('subst:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{safesubst:a|b=c}}'), [('safesubst:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{msgnw:a|b=c}}'), [('msgnw:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{Template:a|b=c}}'), [('Template:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{template:a|b=c}}'), [('template:a', OrderedDict((('b', 'c'), )))]) self.assertEqual(func('{{:a|b=c}}'), [(':a', OrderedDict( (('b', 'c'), )))]) self.assertEqual(func('{{subst::a|b=c}}'), [('subst::a', OrderedDict((('b', 'c'), )))])