def test(word): assert_equal(split_ascii(word), split_word) assert_equal( [ convert_ascii_reverse(letter) for letter in split_unicode(convert_ascii(word)) ], split_word )
def test(unicode_word): assert_equal(convert_ascii_reverse(unicode_word), ascii_word)
def test(word): assert_equal(split_ascii(word), split_word) assert_equal([ convert_ascii_reverse(letter) for letter in split_unicode(convert_ascii(word)) ], split_word)
print(text) if len(roots) > 0: add_definitions(text) return sorted(definitions) if __name__ == "__main__": filename = os.path.abspath(os.path.join(os.path.basename(__file__), "../source_material/lexicon.htm")) html = open(filename).read() definitions = extract_definitions(html) check_correctness = False if check_correctness: roots = extract_roots(html) print("found {} roots".format(len(roots))) print("found {} definitions".format(len(definitions))) defined_roots = set(r for (r, d) in definitions) assert len(defined_roots) == len(definitions) undefined_roots = set(r for r in roots if r not in defined_roots) print("defined roots", len(defined_roots)) print("undefined roots", len(undefined_roots)) for r in sorted(undefined_roots): print(r) print(set(defined_roots) - set(roots)) for root, definition in definitions: root = convert_ascii_reverse(root) print("{}|{}".format(root, definition))
# This should be run from the root, i.e: # python extract/convert_tables_yaml.py from ithkuil.grammar import * from ithkuil.phonology import convert_ascii_reverse keys = list(itertools.product(*ca_order)) with open('data/ca.yaml', 'w') as f: for key in keys: affixes = ca_table[frozenset(key)] affixes = [convert_ascii_reverse(a) for a in affixes] affix_lines = [ ' - {}'.format(a) for a in affixes ] line = '{}:\n{}\n'.format( '/'.join(key), '\n'.join(affix_lines), ) f.write(line)
add_definitions(text) return sorted(definitions) if __name__ == '__main__': filename = os.path.abspath( os.path.join(os.path.basename(__file__), '../source_material/lexicon.htm')) html = open(filename).read() definitions = extract_definitions(html) check_correctness = False if check_correctness: roots = extract_roots(html) print('found {} roots'.format(len(roots))) print('found {} definitions'.format(len(definitions))) defined_roots = set(r for (r, d) in definitions) assert len(defined_roots) == len(definitions) undefined_roots = set(r for r in roots if r not in defined_roots) print('defined roots', len(defined_roots)) print('undefined roots', len(undefined_roots)) for r in sorted(undefined_roots): print(r) print(set(defined_roots) - set(roots)) for root, definition in definitions: root = convert_ascii_reverse(root) print('{}|{}'.format(root, definition))