def replace(instance: WikiTextHtml, wikitext: wikitextparser.WikiText): slugs = defaultdict(int) # type: Dict[str, int] # Calculate how many times we see the same slug. # We want the sections to be numbered from top to bottom. So we have to do # this in a separate step, as we run the sections in reverse while # modifying them. for section in wikitext.get_sections(): if not section or not section.title: continue slugs[_slugify(section.title)] += 1 for section in reversed(wikitext.get_sections()): if not section: continue if section.title: slug = _slugify(section.title) if slugs[slug] != 1: slugs[slug] -= 1 slug = f"{slug}_{slugs[slug] + 1}" title = section.title.strip() content = f"<h{section.level}>" content += f'<a class="anchor" href="#{slug}"></a>' content += f'<span class="mw-headline" id="{slug}">{title}</span>' content += f"</h{section.level}>\n" else: content = "" section.string = content + section.contents
def test_last_arg_last_char_is_newline(): """Do not add comment_indent when it has no effect.""" assert '{{text\n |{{#if:\n \n }}\n}}' == \ WikiText('{{text|{{#if:}}\n}}').pformat() assert ( '{{text\n' ' |{{text\n' ' |{{#if:\n' ' \n' ' }}\n' '<!--\n' ' -->}}\n' '}}') == WikiText('{{text|{{text|{{#if:}}\n}}\n}}').pformat() assert ( '{{text\n' ' |{{text\n' ' |{{#if:\n' ' \n' ' }}\n' ' }}\n' '}}') == WikiText('{{text|{{text|{{#if:}}\n }}\n}}').pformat() assert '{{text\n |a\n |b\n}}' == WikiText( '{{text|a\n |b\n}}').pformat() assert '{{text\n |a\n | 2 = b\n}}' == WikiText( '{{text|a\n |2=b\n}}').pformat() assert ( '{{en:text\n' ' | n=v\n' '}}') == parse('{{en:text|n=v\n}}').pformat()
def test_wikilink_in_template(self): # todo: merge with test_spans? ae = self.assertEqual s = "{{text |[[A|}}]]}}" ts = str(WikiText(s).templates[0]) ae(s, ts) ae(s, str(WikiText('<ref>{{text |[[A|}}]]}}</ref>').templates[0]))
def test_rm_start_not_equal_to_self_start(): wt = WikiText('t{{a}}') wt._type_to_spans = {'Templates': [[1, 6]]} # noinspection PyProtectedMember wt._close_subspans(5, 6) # noinspection PyProtectedMember assert wt._type_to_spans == {'Templates': [[1, 6]]}
def test_double_space_indent(self): s = "{{a|b=b|c=c|d=d|e=e}}" wt = WikiText(s) self.assertEqual( '{{a\n | b = b\n | c = c\n | d = d\n | e = e\n}}', wt.pformat(' '), )
def test_template_name_cannot_be_empty(self): ae = self.assertEqual ae(WikiText('{{_}}')._type_to_spans['Template'], []) ae(WikiText('{{_|text}}')._type_to_spans['Template'], []) ae(len(WikiText('{{text| {{_}} }}')._type_to_spans['Template']), 1) ae(len(WikiText('{{ {{_|text}} | a }}')._type_to_spans['Template']), 0) ae(len(WikiText('{{a{{_|text}} | a }}')._type_to_spans['Template']), 0)
def test_last_arg_last_char_is_newline(self): """Do not add comment_indent when it has no effect.""" ae = self.assertEqual ae('{{text\n |{{#if:\n \n }}\n}}', WikiText('{{text|{{#if:}}\n}}').pformat()) ae( '{{text\n' ' |{{text\n' ' |{{#if:\n' ' \n' ' }}\n' '<!--\n' ' -->}}\n' '}}', WikiText('{{text|{{text|{{#if:}}\n}}\n}}').pformat()) ae( '{{text\n' ' |{{text\n' ' |{{#if:\n' ' \n' ' }}\n' ' }}\n' '}}', WikiText('{{text|{{text|{{#if:}}\n }}\n}}').pformat()) ae('{{text\n |a\n |b\n}}', WikiText('{{text|a\n |b\n}}').pformat()) ae('{{text\n |a\n | 2 = b\n}}', WikiText('{{text|a\n |2=b\n}}').pformat()) ae('{{en:text\n' ' | n=v\n' '}}', parse('{{en:text|n=v\n}}').pformat())
def test_repformat(): """Make sure that pformat won't mutate self.""" s = '{{a|{{b|{{c}}}}}}' a, b, c = WikiText(s).templates assert '{{a\n | 1 = {{b\n | 1 = {{c}}\n }}\n}}' == a.pformat() # Again: assert '{{a\n | 1 = {{b\n | 1 = {{c}}\n }}\n}}' == a.pformat()
def test_template_name_cannot_be_empty(): assert WikiText('{{_}}')._type_to_spans['Template'] == [] assert WikiText('{{_|text}}')._type_to_spans['Template'] == [] assert len(WikiText('{{text| {{_}} }}')._type_to_spans['Template']) == 1 assert len(WikiText('{{ {{_|text}} | a }}')._type_to_spans['Template']) \ == 0 assert len(WikiText('{{a{{_|text}} | a }}')._type_to_spans['Template']) \ == 0
def test_external_link_match_is_not_in_spans(self): wt = WikiText('t [http://b.b b] t [http://c.c c] t') # calculate the links links1 = wt.external_links wt.insert(0, 't [http://a.a a]') links2 = wt.external_links self.assertEqual(links1[1].string, '[http://c.c c]') self.assertEqual(links2[0].string, '[http://a.a a]')
def test_repformat(self): """Make sure that pformat won't mutate self.""" ae = self.assertEqual s = '{{a|{{b|{{c}}}}}}' a, b, c = WikiText(s).templates ae('{{a\n | 1 = {{b\n | 1 = {{c}}\n }}\n}}', a.pformat()) # Again: ae('{{a\n | 1 = {{b\n | 1 = {{c}}\n }}\n}}', a.pformat())
def test_spans_are_closed_properly(self): # Real example: # self.assertEqual( # '{{text\n | 1 = {{#if:\n \n | \n }}\n}}', # WikiText('{{text|1={{#if:|}}\n\n}}').pformat(), # ) wt = WikiText('') wt._type_to_spans = {'ParserFunction': [[16, 25]]} wt._close_subspans(16, 27) self.assertFalse(wt._type_to_spans['ParserFunction'])
def test_a_b_from_different_objects(self): s = '{{b|{{a}}}}' b1, a1 = WikiText(s).templates b2, a2 = WikiText(s).templates self.assertTrue(a1 in b1) self.assertTrue(a2 in b2) self.assertFalse(a2 in b1) self.assertFalse(a1 in b2) self.assertTrue('{{a}}' in b1) self.assertFalse('{{c}}' in b2)
def test_a_b_from_different_objects(): s = '{{b|{{a}}}}' b1, a1 = WikiText(s).templates b2, a2 = WikiText(s).templates assert a1 in b1 assert a2 in b2 assert a2 not in b1 assert a1 not in b2 assert '{{a}}' in b1 assert'{{c}}' not in b2
def test_a_b_from_different_objects(self): ai = self.assertIn ani = self.assertNotIn s = '{{b|{{a}}}}' b1, a1 = WikiText(s).templates b2, a2 = WikiText(s).templates ai(a1, b1) ai(a2, b2) ani(a2, b1) ani(a1, b2) ai('{{a}}', b1) ani('{{c}}', b2)
def test_spans_are_closed_properly(): # Real example: # ae( # '{{text\n | 1 = {{#if:\n \n | \n }}\n}}', # WikiText('{{text|1={{#if:|}}\n\n}}').pformat(), # ) wt = WikiText('') wt._type_to_spans = {'ParserFunction': [[16, 25, None, None]]} # noinspection PyProtectedMember wt._close_subspans(16, 27) # noinspection PyProtectedMember assert not wt._type_to_spans['ParserFunction']
def test_bare_link(self): s = 'text1 HTTP://mediawiki.org text2' wt = WikiText(s) self.assertEqual( 'HTTP://mediawiki.org', str(wt.external_links[0]), )
def test_getitem(self): s = '{{t1|{{t2}}}}' t1, t2 = WikiText(s).templates self.assertEqual(t2[2], 't') self.assertEqual(t2[2:4], 't2') self.assertEqual(t2[-4:-2], 't2') self.assertEqual(t2[-3], '2')
def test_a_seems_to_be_in_b_but_in_another_span(self): s = '{{b|{{a}}}}{{a}}' b, a1, a2 = WikiText(s).templates self.assertTrue(a1 in b) self.assertFalse(a2 in b) self.assertFalse(a2 in a1) self.assertFalse(a1 in a2)
def test_parser_function_alias_without_hash_sign(self): """`آرایشعدد` is an alias for `formatnum` on Persian Wikipedia. See: //translatewiki.net/wiki/MediaWiki:Sp-translate-data-MagicWords/fa """ self.assertEqual( 1, len(WikiText("{{آرایشعدد:text|R}}").parser_functions))
def test_call(self): ae = self.assertEqual t1, t2 = WikiText('{{t1|{{t2}}}}').templates ae(t2(2), 't') ae(t2(2, 4), 't2') ae(t2(-4, -2), 't2') ae(t2(-3), '2')
def test_attr_contains_template_newline_invalid_chars(self): self.assertEqual( WikiText(' {| class=wikitable |ب style="color: {{text| 1 =\n' 'red}};"\n' '| cell\n' '|}\n').tables[0].get_attr('style'), 'color: {{text| 1 =\nred}};')
def test_unicode_parameters(): (a, b, _, _), (c, d, _, _) = WikiText( '{{{پارا۱|{{{پارا۲|پيشفرض}}}}}}')._type_to_spans['Parameter'] assert a == 0 assert b == 30 assert c == 9 assert d == 27
def test_numbered_link(self): s = 'text1 [http://mediawiki.org] text2' wt = WikiText(s) self.assertEqual( '[http://mediawiki.org]', str(wt.external_links[0]), )
def test_keyword_and_positional_args_removal(): wt = WikiText("text{{t1|kw=a|1=|pa|kw2=a|pa2}}{{t2|a|1|1=}}text") t1, t2 = wt.templates t1_args = t1.arguments t2_args = t2.arguments assert '1' == t1_args[2].name assert 'kw2' == t1_args[3].name assert '2' == t1_args[4].name assert '1' == t2_args[0].name assert '2' == t2_args[1].name assert '1' == t2_args[2].name del t1_args[0][:] t1_args = t1.arguments t2_args = t2.arguments assert '1' == t1_args[0].name assert 'kw2' == t1_args[2].name assert '|pa2' == t1_args[3].string assert '1' == t2_args[0].name assert '2' == t2_args[1].name assert '1' == t2_args[2].name del t1_args[1][:] t1_args = t1.arguments t2_args = t2.arguments assert "text{{t1|1=|kw2=a|pa2}}{{t2|a|1|1=}}text" == wt.string assert 'pa2' == t1_args[2].value assert '1' == t1_args[2].name assert 'a' == t2_args[0].value assert '1' == t2_args[0].name
def test_a_seems_to_be_in_b_but_in_another_span(): s = '{{b|{{a}}}}{{a}}' b, a1, a2 = WikiText(s).templates assert a1 in b assert a2 not in b assert a2 not in a1 assert a1 not in a2
def test_protocol_relative(self): s = 'text1 [//en.wikipedia.org wikipedia] text2' wt = WikiText(s) self.assertEqual( '[//en.wikipedia.org wikipedia]', str(wt.external_links[0]), )
def replace(instance: WikiTextHtml, wikitext: wikitextparser.WikiText, function_arguments: List[wikitextparser.Argument]): for parameter in reversed(wikitext.parameters): # Check if the parameter can be found by name. # A position value, like {{{4}}} can also be found by name if the # caller used the |4=..| syntax. If this fails, we fall back to # search for the 4th positional argument in this example. value = _lookup_named(function_arguments, parameter.name) if value is None and parameter.name.isnumeric(): index = int(parameter.name) value = _lookup_nth_positional(function_arguments, index) if value is None: if parameter.default is not None: value = parameter.default else: value = "{{{" + parameter.name + "}}}" # Only strip if this was not a multiline value if value.count("\n") < 2: value = value.strip() parameter.string = value # It is possible "[[{{{link|}}}|Text]]"" is now "[[|Text]]". This is not # a valid wikilink, but the | will be picked up by parser-functions. As # such, remove the problem by escaping it. For details, see: # https://github.com/5j9/wikitextparser/pull/78#issuecomment-714227623 if "[[|" in wikitext.string: wikitext.string = re.sub(r"\[\[\|([^\]]*)\]\]", r"%5B%5B%7C\1%5D%5D", wikitext.string)
def test_pformat_keep_separated(self): """Test that `{{ {{t}} }}` is not converted to `{{{{t}}}}`. `{{{{t}}}}` will be interpreted as a parameter with {} around it. """ self.assertEqual('{{ {{t}} }}', WikiText('{{{{t}} }}').pformat())
def test_keyword_and_positional_args_removal(self): ae = self.assertEqual wt = WikiText("text{{t1|kw=a|1=|pa|kw2=a|pa2}}{{t2|a|1|1=}}text") t1, t2 = wt.templates t1_args = t1.arguments t2_args = t2.arguments ae('1', t1_args[2].name) ae('kw2', t1_args[3].name) ae('2', t1_args[4].name) ae('1', t2_args[0].name) ae('2', t2_args[1].name) ae('1', t2_args[2].name) del t1_args[0][:] t1_args = t1.arguments t2_args = t2.arguments ae('1', t1_args[0].name) ae('kw2', t1_args[2].name) ae('|pa2', t1_args[3].string) ae('1', t2_args[0].name) ae('2', t2_args[1].name) ae('1', t2_args[2].name) del t1_args[1][:] t1_args = t1.arguments t2_args = t2.arguments ae("text{{t1|1=|kw2=a|pa2}}{{t2|a|1|1=}}text", wt.string) ae('pa2', t1_args[2].value) ae('1', t1_args[2].name) ae('a', t2_args[0].value) ae('1', t2_args[0].name)