def test_parse(self): page = WikiPage( 'Japan', 'en', "'''Japan''' is a [[Sovereign state|sovereign]] [[island country|island nation]] in [[East Asia]]", None) ret = dump_db._parse(page) eq_('page', ret[0]) eq_(b'Japan', ret[1][0]) paragraph = pickle.loads(zlib.decompress(ret[1][1]))[0][0] eq_('Japan is a sovereign island nation in East Asia', paragraph[0]) eq_([('Sovereign state', 'sovereign', 11, 20), ('Island country', 'island nation', 21, 34), ('East Asia', 'East Asia', 38, 47)], paragraph[1])
def test_parse_redirect(self): page = WikiPage('日本', 'en', '#REDIRECT [[Japan]]', 'Japan') ret = dump_db._parse(page) eq_('redirect', ret[0]) eq_('日本'.encode('utf-8'), ret[1][0]) eq_(b'Japan', ret[1][1])