Esempio n. 1
0
def test_more_complicated_page():
    xml = """
<page>
    <title>free</title>
    <id>19</id>
    <revision>
      <id>54930541</id>
      <minor />
      <comment>move lang= to 1= in {{rfquotek}} (2)</comment>
      <text xml:space="preserve">{{also|-free}}
      </text>
      <sha1>a27crxygaj92zlcfgeattj7pmfh7kxk</sha1>
    </revision>
  </page>
  """
    expected = {
        'page': {
            'title': 'free',
            'id': '19',
            'revision': {
                'id': '54930541',
                'minor': '',
                'comment': 'move lang= to 1= in {{rfquotek}} (2)',
                'text': '{{also|-free}}',
                'sha1': 'a27crxygaj92zlcfgeattj7pmfh7kxk'
            }
        }
    }
    assert xml_to_dict(xml) == expected
Esempio n. 2
0
def test_xml_parser_two_levels():
    xml = """
        <page>
            <title>free</title>
            <revision>
                rev
            </revision>
        </page>
    """
    expected = {'page': {'title': 'free', 'revision': 'rev'}}
    assert xml_to_dict(xml) == expected
Esempio n. 3
0
def test_xml_parser_more_levels_empty_text():
    xml = """
        <page>
            <title>free</title>
            <revision>
                <text xml:space="preserve" />
            </revision>
        </page>
    """
    expected = {'page': {'title': 'free', 'revision': {'text': ''}}}
    assert xml_to_dict(xml) == expected
Esempio n. 4
0
def parse_page(page):
    d = xml_to_dict(page)['page']
    if not d['revision']['text']:
        text_d = {}
    else:
        text_d = parse_wikitext(d['revision']['text'], 'IPA', 'en', 'File')
    ms = ''
    if 'IPA' in text_d:
        ms = pronunciation_to_numbers(text_d['IPA'])
        text_d['ms'] = ms
    return {'title': d['title'], 'text': text_d}
Esempio n. 5
0
def test_xml_parser_three_levels():
    xml = """
        <page>
            <revision>
                <text xml:space="preserve">
                    this is text.
                </text>
            </revision>
        </page>
    """
    expected = {'page': {'revision': {'text': 'this is text.'}}}
    assert xml_to_dict(xml) == expected
Esempio n. 6
0
def test_xml_parser_empty_simpler():
    assert xml_to_dict("<page/>") == {'page': ''}
    assert xml_to_dict("<page />") == {'page': ''}
Esempio n. 7
0
def test_xml_parser_empty():
    xml = "<page></page>"
    assert xml_to_dict(xml) == {'page': ''}
Esempio n. 8
0
def test_xml_parser_one_level_with_attr():
    xml = "<page some=prop>hello</page>"
    assert xml_to_dict(xml) == {'page': 'hello'}
Esempio n. 9
0
def test_xml_parser_one_level():
    xml = "<page>hello</page>"
    assert xml_to_dict(xml) == {'page': 'hello'}