Python LexicalEntry 예제들, pylmflib.core.lexical_entry.LexicalEntry Python 예제들

예제 #1

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_add_lexical_entry(self):
     # Create lexical entries
     entry1 = LexicalEntry()
     entry2 = LexicalEntry()
     # Test add entries to the lexicon
     self.assertEqual(self.lexicon.add_lexical_entry(entry1), self.lexicon)
     self.assertListEqual(self.lexicon.lexical_entry, [entry1])
     self.assertEqual(self.lexicon.add_lexical_entry(entry2), self.lexicon)
     self.assertListEqual(self.lexicon.lexical_entry, [entry1, entry2])
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2

예제 #2

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_get_lexical_entries(self):
     # Create lexical entries
     entry1 = LexicalEntry()
     entry2 = LexicalEntry()
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry1, entry2]
     # Test get lexical entries
     self.assertListEqual(self.lexicon.get_lexical_entries(), [entry1, entry2])
     self.lexicon.lexical_entry.append(entry1)
     self.assertListEqual(self.lexicon.get_lexical_entries(), [entry1, entry2, entry1])
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2

예제 #3

0

파일 보기

파일: test_morphology_component.py 프로젝트: yuhsianglin/HimalCo

 def test_set_get_lexical_entry(self):
     # Create a lexical entry
     entry = LexicalEntry()
     # Test set lexical entry
     self.assertEqual(self.component.set_lexical_entry(entry),
                      self.component)
     # Test get lexical entry
     self.assertEqual(self.component.get_lexical_entry(), entry)
     # Test lexical entry modifications
     entry.lexeme = "toto"
     self.assertEqual(self.component.get_lexical_entry().lexeme, "toto")
     # Release lexical entry
     del entry

예제 #4

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_remove_lexical_entry(self):
     # Create lexical entries
     entry1 = LexicalEntry()
     entry2 = LexicalEntry()
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry1, entry2]
     # Test remove lexical entries
     self.assertEqual(self.lexicon.remove_lexical_entry(entry1), self.lexicon)
     self.assertListEqual(self.lexicon.lexical_entry, [entry2])
     self.assertEqual(self.lexicon.remove_lexical_entry(entry2), self.lexicon)
     self.assertListEqual(self.lexicon.lexical_entry, [])
     # Release LexicalEntry instances
     del entry1, entry2

예제 #5

0

파일 보기

파일: test_input_xml_lmf.py 프로젝트: yuhsianglin/HimalCo

 def test_get_sub_elements(self):
     # Declare instance and prepare XML element with its sub-elements
     instance = LexicalEntry()
     element = Element("LexicalEntry")
     lemma = SubElement(element, "Lemma")
     SubElement(lemma, "feat", att="lexeme", val="hello")
     SubElement(element, "feat", att="partOfSpeech", val="toto")
     SubElement(element, "feat", att="status", val="draft")
     # Test results
     get_sub_elements(instance, element)
     self.assertEqual(instance.get_lexeme(), "hello")
     self.assertEqual(instance.get_partOfSpeech(), "toto")
     self.assertEqual(instance.get_status(), "draft")
     del instance, element, lemma

예제 #6

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_count_lexical_entries(self):
     # Create lexical entries
     entry1 = LexicalEntry()
     entry2 = LexicalEntry()
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry1]
     # Test count lexical entries
     self.assertEqual(self.lexicon.count_lexical_entries(), 1)
     self.lexicon.lexical_entry.append(entry2)
     self.assertEqual(self.lexicon.count_lexical_entries(), 2)
     self.lexicon.lexical_entry.append(entry1)
     self.assertEqual(self.lexicon.count_lexical_entries(), 3)
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2

예제 #7

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_audio(self):
     entry = LexicalEntry()
     entry.set_audio(file_name="./test/input.wav")
     expected = "\includemedia[\n" \
         "\taddresource=./test/input.mp3,\n" \
         "\tflashvars={\n" \
         "\t\tsource=./test/input.mp3\n" \
         "\t\t&autoPlay=true\n" \
         "\t\t&autoRewind=true\n" \
         "\t\t&loop=false\n" \
         "\t\t&hideBar=true\n" \
         "\t\t&volume=1.0\n" \
         "\t\t&balance=0.0\n" \
         "}]{\includegraphics[scale=0.5]{sound.jpg}}{APlayer.swf} \\hspace{0.1cm}\n"
     self.assertEqual(format_audio(entry, font), expected)
     del entry

예제 #8

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_related_forms(self):
     entry = LexicalEntry()
     entry.create_and_add_related_form("syn", mdf_semanticRelation["sy"])
     entry.create_and_add_related_form("ant", mdf_semanticRelation["an"])
     entry.set_morphology("morph")
     entry.create_and_add_related_form("see", mdf_semanticRelation["cf"])
     expected = "\\textit{Syn:} \\textbf{\ipa{syn}}. \\textit{Ant:} \\textbf{\ipa{ant}}. \\textit{Morph:} \\textbf{\ipa{morph}}. \\textit{See:} \\textbf{\ipa{see}} "
     self.assertEqual(format_related_forms(entry, font), expected)
     del entry

예제 #9

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_find_lexical_entries(self):
     # Create several lexical entries with different lexemes
     entry1 = LexicalEntry().set_lexeme("Hello")
     entry2 = LexicalEntry().set_lexeme("world!")
     entry3 = LexicalEntry().set_lexeme("hello")
     entry4 = LexicalEntry().set_lexeme("world")
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry1, entry2, entry3, entry4]
     # Test find lexical entries
     self.assertListEqual(self.lexicon.find_lexical_entries(lambda entry: entry.get_lexeme() == "Hello"), [entry1])
     def test_filter(entry):
         return entry.get_lexeme().lower() == "hello"
     # List is randomly ordered => create a set to avoid random results
     self.assertEqual(set(self.lexicon.find_lexical_entries(test_filter)), set([entry1, entry3]))
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2, entry3, entry4

예제 #10

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_variant_forms(self):
     entry = LexicalEntry()
     entry.set_variant_form("var_ver")
     entry.set_variant_comment("com_ver", language="ver")
     entry.set_variant_comment("com_eng", language="eng")
     entry.set_variant_comment("com_nat", language="nat")
     entry.set_variant_comment("com_reg", language="reg")
     expected = "\\textit{Variant:} \\textbf{\ipa{var_ver}} (com_eng) (\\textit{\zh{com_nat}}) (\ipa{com_reg}) "
     self.assertEqual(format_variant_forms(entry, font), expected)
     del entry

예제 #11

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_usage_notes(self):
     entry = LexicalEntry()
     entry.set_usage_note("use_ver", language="ver")
     entry.set_usage_note("use_eng", language="eng")
     entry.set_usage_note("use_nat", language="nat")
     entry.set_usage_note("use_reg", language="reg")
     expected = "\\textit{VerUsage:} \\textbf{\ipa{use_ver}} \\textit{Usage:} use_eng \\textit{\\textit{\zh{use_nat}}} \\textit{[\ipa{use_reg}]} "
     self.assertEqual(format_usage_notes(entry.get_senses()[0], font),
                      expected)
     del entry

예제 #12

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_restrictions(self):
     entry = LexicalEntry()
     entry.set_restriction("strict_ver", language="ver")
     entry.set_restriction("strict_eng", language="eng")
     entry.set_restriction("strict_nat", language="nat")
     entry.set_restriction("strict_reg", language="reg")
     expected = "\\textit{VerRestrict:} \\textbf{\ipa{strict_ver}} \\textit{Restrict:} strict_eng \\textit{\\textit{\zh{strict_nat}}} \\textit{[\ipa{strict_reg}]} "
     self.assertEqual(format_restrictions(entry.get_senses()[0], font),
                      expected)
     del entry

예제 #13

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_examples(self):
     entry = LexicalEntry()
     entry.add_example("ex_ver", language="ver")
     entry.add_example("ex_eng", language="eng")
     entry.add_example("ex_nat", language="nat")
     entry.add_example("ex_reg", language="reg")
     expected = "\\begin{exe}\n\\sn \\textbf{\ipa{ex_ver}}\n\\trans ex_eng\n\\trans \\textit{\\textit{\zh{ex_nat}}}\n\\trans \\textit{[\ipa{ex_reg}]}\n\\end{exe}\n"
     self.assertEqual(format_examples(entry.get_senses()[0], font),
                      expected)
     del entry

예제 #14

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_etymology(self):
     entry = LexicalEntry()
     entry.set_etymology("etym")
     entry.set_etymology_gloss("ETYM")
     expected = u"\\textit{Etym:} \\textbf{etym} \u2018ETYM\u2019. "
     self.assertEqual(format_etymology(entry, font), expected)
     del entry

예제 #15

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_borrowed_word(self):
     entry = LexicalEntry()
     entry.set_borrowed_word("English")
     entry.set_written_form("wave")
     expected = "\\textit{From:} English wave. "
     self.assertEqual(format_borrowed_word(entry, font), expected)
     del entry

예제 #16

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_part_of_speech(self):
     entry = LexicalEntry()
     entry.set_lexeme("action")
     entry.set_partOfSpeech("verb")
     expected = "\\textit{v}. "
     self.assertEqual(format_part_of_speech(entry, font), expected)
     del entry

예제 #17

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_sort_lexical_entries(self):
     # Create several lexical entries with different lexemes
     entry1 = LexicalEntry().set_lexeme("aa")
     entry2 = LexicalEntry().set_lexeme("ab")
     entry3 = LexicalEntry().set_lexeme("ba")
     entry4 = LexicalEntry().set_lexeme("bb")
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry4, entry1, entry2, entry3]
     # Test sort lexical entries
     self.assertListEqual(self.lexicon.sort_lexical_entries(), [entry1, entry2, entry3, entry4])
     self.assertListEqual(self.lexicon.lexical_entry, [entry1, entry2, entry3, entry4])
     # Provide a sort order
     my_order = dict({'A':1.1, 'a':1.2, 'B':2.1, 'b':2.2})
     my_unicode_order = ({})
     for key in my_order.keys():
         my_unicode_order.update({key.decode(encoding='utf8'):my_order[key]})
     entry5 = LexicalEntry().set_lexeme("Aa")
     entry6 = LexicalEntry().set_lexeme("bB")
     self.lexicon.lexical_entry.append(entry5)
     self.lexicon.lexical_entry.append(entry6)
     self.assertListEqual(self.lexicon.sort_lexical_entries(sort_order=my_order), [entry5, entry1, entry2, entry3, entry6, entry4])
     self.assertListEqual(self.lexicon.lexical_entry, [entry5, entry1, entry2, entry3, entry6, entry4])
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2, entry3, entry4, entry5, entry6

예제 #18

0

파일 보기

 def test_xml_lmf_write(self):
     import sys, os
     # Create LMF objects
     lexical_entry = LexicalEntry()
     lexical_entry.lemma = Lemma()
     lexical_entry.partOfSpeech = "toto"
     lexical_entry.status = "draft"
     lexical_entry.lemma.lexeme = "hello"
     # Write XML LMF file and test result
     utest_path = sys.path[0] + '/'
     xml_lmf_filename = utest_path + "lmf_output.xml"
     xml_lmf_write(lexical_entry, xml_lmf_filename)
     xml_lmf_file = open(xml_lmf_filename, "r")
     expected_lines = [
         """<?xml version="1.0" encoding="utf-8"?>""" + EOL,
         """<LexicalEntry id="0">""" + EOL,
         """    <feat att="status" val="draft"/>""" + EOL,
         """    <Lemma>""" + EOL,
         """        <feat att="lexeme" val="hello"/>""" + EOL,
         """    </Lemma>""" + EOL,
         """    <feat att="partOfSpeech" val="toto"/>""" + EOL,
         """</LexicalEntry>""" + EOL
     ]
     self.assertListEqual(expected_lines, xml_lmf_file.readlines())
     xml_lmf_file.close()
     del lexical_entry.lemma
     lexical_entry.lemma = None
     del lexical_entry
     # Remove XML LMF file
     os.remove(xml_lmf_filename)

예제 #19

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_encyclopedic_informations(self):
     entry = LexicalEntry()
     entry.set_encyclopedic_information("info_ver", language="ver")
     entry.set_encyclopedic_information("info_eng", language="eng")
     entry.set_encyclopedic_information("info_nat", language="nat")
     entry.set_encyclopedic_information("info_reg", language="reg")
     expected = "\\textbf{\ipa{info_ver}} info_eng \\textit{\zh{info_nat}} \\textit{[\ipa{info_reg}]} "
     self.assertEqual(
         format_encyclopedic_informations(entry.get_senses()[0], font),
         expected)
     del entry

예제 #20

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_semantic_domains(self):
     entry = LexicalEntry()
     entry.set_semantic_domain("semantic")
     entry.set_semantic_domain("domain")
     expected = "\\textit{SD:} semantic. "
     expected += "\\textit{SD:} domain. "
     self.assertEqual(format_semantic_domains(entry, font), expected)
     del entry

예제 #21

0

파일 보기

파일: test_output_mdf.py 프로젝트: yuhsianglin/HimalCo

 def test_mdf_write(self):
     import sys, os
     # Create LMF objects
     lexical_entry = LexicalEntry()
     lexical_entry.lemma = Lemma()
     lexical_entry.partOfSpeech = "toto"
     lexical_entry.status = "draft"
     lexical_entry.lemma.lexeme = "hello"
     lexicon = Lexicon()
     lexicon.add_lexical_entry(lexical_entry)
     # Write MDF file and test result
     utest_path = sys.path[0] + '/'
     mdf_filename = utest_path + "output.txt"
     mdf_write(lexicon, mdf_filename)
     mdf_file = open(mdf_filename, "r")
     expected_lines = [
         "\\lx hello" + EOL, "\\ps toto" + EOL, "\\st draft" + EOL, EOL
     ]
     self.assertListEqual(expected_lines, mdf_file.readlines())
     mdf_file.close()
     # Customize mapping
     lmf2mdf = dict({
         "lx":
         lambda lexical_entry: lexical_entry.get_status(),
         "ps":
         lambda lexical_entry: lexical_entry.get_partOfSpeech(),
         "st":
         lambda lexical_entry: lexical_entry.get_lexeme()
     })
     order = ["st", "lx", "ps"]
     # Write MDF file and test result
     mdf_write(lexicon, mdf_filename, lmf2mdf, order)
     mdf_file = open(mdf_filename, "r")
     expected_lines = [
         "\\st hello" + EOL, "\\lx draft" + EOL, "\\ps toto" + EOL, EOL
     ]
     self.assertListEqual(expected_lines, mdf_file.readlines())
     mdf_file.close()
     del lexical_entry.lemma
     lexical_entry.lemma = None
     del lexical_entry, lexicon
     # Remove MDF file
     os.remove(mdf_filename)

예제 #22

0

파일 보기

 def test_add_link(self):
     from pylmflib.morphology.related_form import RelatedForm
     input = Element("RelatedForm", targets="lx")
     form = RelatedForm()
     form.set_lexical_entry(LexicalEntry(id="lx_id"))
     # Create output element and sub-elements
     output = Element("RelatedForm", targets="lx")
     sub = SubElement(output, "a")
     sub.attrib["href"] = "lx_id1"
     # Fill in text
     sub.text = "lx"
     result = add_link(form, input)
     self.assertEqual(result[0], form)
     self.assertEqual(tostring(result[1]), tostring(output))

예제 #23

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_format_link(self):
     entry = LexicalEntry("link_0")
     entry.set_lexeme("link")
     expected = "\\hyperlink{link_01}{\\textbf{\ipa{link}}}"
     self.assertEqual(format_link(entry, font), expected)
     entry.set_homonymNumber(2)
     expected = "\\hyperlink{link_02}{\\textbf{\ipa{link}} \\textsubscript{2}}"
     self.assertEqual(format_link(entry, font), expected)
     del entry

예제 #24

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_check_cross_references(self):
     # Create lexical entries with lexemes and related lexemes
     entry1 = LexicalEntry().set_lexeme("Hello").create_and_add_related_form("world!", "main entry")
     entry2 = LexicalEntry().set_lexeme("world!").create_and_add_related_form("Hello", "subentry")
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry1, entry2]
     # Test check cross references
     self.assertIs(self.lexicon.check_cross_references(), self.lexicon)
     self.assertIs(entry1.related_form[0].get_lexical_entry(), entry2)
     self.assertIs(entry2.related_form[0].get_lexical_entry(), entry1)
     # Test warning case: entry not found
     entry3 = LexicalEntry().set_lexeme("hello").create_and_add_related_form("world", "main entry")
     self.lexicon.lexical_entry.append(entry3)
     self.lexicon.reset_check()
     self.lexicon.check_cross_references()
     # Retrieve nominal case
     entry4 = LexicalEntry().set_lexeme("world")
     self.lexicon.lexical_entry.append(entry4)
     self.lexicon.reset_check()
     self.assertIs(self.lexicon.check_cross_references(), self.lexicon)
     self.assertIs(entry3.related_form[0].get_lexical_entry(), entry4)
     # Test warning case: several entries found
     entry5 = LexicalEntry().set_lexeme("world")
     self.lexicon.lexical_entry.append(entry5)
     self.lexicon.reset_check()
     self.lexicon.check_cross_references()
     # Test check cross references with homonym number
     entry3.related_form[0].set_lexical_entry(None)
     entry3.related_form[0].targets = "world2"
     entry4.homonymNumber = "1"
     entry5.homonymNumber = "2"
     self.lexicon.reset_check()
     self.assertIs(self.lexicon.check_cross_references(), self.lexicon)
     self.assertIs(entry3.related_form[0].get_lexical_entry(), entry5)
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2, entry3, entry4, entry5

예제 #25

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

 def test_insert_references(self):
     lexical_entry = LexicalEntry()
     lexical_entry.set_spelling_variant("tata")
     # Test transitive verb
     lexical_entry.partOfSpeech = "transitive verb"
     expected_lines = "\\hfill\\break See: \\ref{tata.vt}" + EOL
     expected_lines += "\\ref{tata.vt.eng}" + EOL
     self.assertEqual(expected_lines, insert_references(lexical_entry))
     # Test intransitive verb
     lexical_entry.partOfSpeech = "intransitive verb"
     expected_lines = "\\hfill\\break See: \\ref{tata.vi}" + EOL
     expected_lines += "\\ref{tata.vi.eng}" + EOL
     self.assertEqual(expected_lines, insert_references(lexical_entry))
     # Test reflexive verb
     lexical_entry.partOfSpeech = "reflexive verb"
     expected_lines = "\\hfill\\break See: \\ref{tata.vr}" + EOL
     expected_lines += "\\ref{tata.vr.eng}" + EOL
     self.assertEqual(expected_lines, insert_references(lexical_entry))

예제 #26

0

파일 보기

파일: test_core_lexicon.py 프로젝트: yuhsianglin/HimalCo

 def test_sort_homonym_numbers(self):
     # Create several lexical entries
     entry1 = LexicalEntry().set_lexeme("aa").set_homonymNumber("2")
     entry2 = LexicalEntry().set_lexeme("aa").set_homonymNumber("1")
     entry3 = LexicalEntry().set_lexeme("ab")
     entry4 = LexicalEntry().set_lexeme("ba")
     entry5 = LexicalEntry().set_lexeme("bb").set_homonymNumber("6")
     entry6 = LexicalEntry().set_lexeme("bb").set_homonymNumber("5")
     # Add entries to the lexicon
     self.lexicon.lexical_entry = [entry1, entry2, entry3, entry4, entry5, entry6]
     # Test sort homonym numbers
     self.assertListEqual(self.lexicon.sort_homonym_numbers(), [entry2, entry1, entry3, entry4, entry6, entry5])
     self.assertListEqual(self.lexicon.lexical_entry, [entry2, entry1, entry3, entry4, entry6, entry5])
     # Release LexicalEntry instances
     del self.lexicon.lexical_entry[:]
     del entry1, entry2, entry3, entry4, entry5, entry6

예제 #27

0

파일 보기

 def test_build_sub_elements(self):
     # Create LMF objects and an empty XML element
     instance = LexicalEntry()
     instance.lemma = Lemma()
     instance.partOfSpeech = "toto"
     instance.status = "draft"
     instance.lemma.lexeme = "hello"
     element = Element("LexicalEntry")
     # Build sub-elements and test result
     build_sub_elements(instance, element)
     lemma = element.find("Lemma")
     lexeme = lemma.find("feat")
     self.assertEqual(lexeme.attrib["att"], "lexeme")
     self.assertEqual(lexeme.attrib["val"], "hello")
     [status, partOfSpeech] = element.findall("feat")
     self.assertEqual(partOfSpeech.attrib["att"], "partOfSpeech")
     self.assertEqual(partOfSpeech.attrib["val"], "toto")
     self.assertEqual(status.attrib["att"], "status")
     self.assertEqual(status.attrib["val"], "draft")
     del instance.lemma
     instance.lemma = None
     del instance, element

예제 #28

0

파일 보기

파일: test_output_doc.py 프로젝트: yuhsianglin/HimalCo

    def test_doc_write(self):
        import sys, os
        # Create LMF objects
        lexical_entry = LexicalEntry()
        lexical_entry.lemma = Lemma()
        lexical_entry.partOfSpeech = "toto"
        lexical_entry.status = "draft"
        lexical_entry.lemma.lexeme = "hello"
        lexicon = Lexicon()
        lexicon.add_lexical_entry(lexical_entry)
        lexical_resource = LexicalResource()
        lexical_resource.add_lexicon(lexicon)
        # Write document file and test result
        utest_path = sys.path[0] + '/'
        doc_filename = utest_path + "output.docx"
        doc_write(lexical_resource, doc_filename)
        doc_file = open(doc_filename, "r")
        doc_file.readlines()
        doc_file.close()

        # Customize mapping
        def lmf2doc(lexicon, document, items, sort_order, paradigms, reverse):
            return "test"

        # Write document file and test result
        doc_write(lexical_resource, doc_filename, None, lmf2doc)
        doc_file = open(doc_filename, "r")
        doc_file.readlines()
        doc_file.close()
        del lexical_entry.lemma
        lexical_entry.lemma = None
        del lexical_entry, lexicon
        lexicon = None
        del lexical_resource
        # Remove document file
        os.remove(doc_filename)

예제 #29

0

파일 보기

파일: mdf.py 프로젝트: yuhsianglin/HimalCo

def mdf_read(filename=None,
             mdf2lmf=mdf_lmf,
             lexicon=None,
             id=None,
             encoding=ENCODING):
    """! @brief Read an MDF file.
    @param filename The name of the MDF file to read with full path, for instance 'user/input.txt'.
    @param mdf2lmf A Python dictionary describing the mapping between MDF markers and LMF representation. Default value is 'mdf_lmf' dictionary defined in 'pylmflib/config/mdf.py'. Please refer to it as an example.
    @param lexicon An existing Lexicon to fill with lexical entries to read.
    @param id A Python string identifying the lexicon to create.
    @param encoding Use 'utf-8' encoding by default. Otherwise, user has to precise the native encoding of its document.
    @return A Lexicon instance containing all lexical entries.
    """
    import re
    # If not provided, create a Lexicon instance to contain all lexical entries
    if lexicon is None:
        lexicon = Lexicon(id)
    # Read in unicode
    if filename is None:
        filename = lexicon.get_entrySource()
    else:
        # Set lexicon attribute
        lexicon.set_entrySource(filename)
    # Read in unicode
    mdf_file = open_read(filename, encoding=encoding)
    # MDF syntax is the following: '\marker value'
    mdf_pattern = """^\\\(\w*) (<(.*)>)? ?(.*)$"""
    # Add each lexical entry to the lexicon
    current_entry = None
    sub_entry = None
    component = None
    main_entry = None
    for line in mdf_file.readlines():
        # Do not parse empty lines
        if line != EOL:
            result = re.match(mdf_pattern, line)
            if result is None:
                # Line is empty => continue parsing next line
                continue
            marker = result.group(1)
            attrs = result.group(3)
            value = result.group(4)
            # Do not consider markers starting with an underscore character (e.g. '_sh' and '_DateStampHasFourDigitYear')
            if marker[0] == '_':
                continue
            # Remove trailing spaces and end-of-line characters
            value = value.rstrip(' \r\n')
            # Do not consider empty fields
            if value == "":
                continue
            # Check if the current entry is a multiword expression
            is_mwe = False
            if marker == "lf":
                lf = value.split(" = ")
                if lf[0].startswith("Component"):
                    component_nb = lf[0].lstrip("Component")
                    value = lf[1]
                    is_mwe = True
            # 'lx' and 'se' markers indicate a new entry
            if marker == "lx" or marker == "se" or is_mwe:
                # Compute a unique identifier
                uid = uni2sampa(value)
                if marker == "se":
                    # Create a subentry
                    sub_entry = LexicalEntry(uid)
                    # An MDF subentry corresponds to an LMF lexical entry
                    mdf2lmf["lx"](value, sub_entry)
                    # Add it to the lexicon
                    lexicon.add_lexical_entry(sub_entry)
                    # Manage main entry
                    if main_entry is None:
                        main_entry = current_entry
                    else:
                        current_entry = main_entry
                    # Set main entry
                    homonym_nb = current_entry.get_homonymNumber()
                    if homonym_nb is None:
                        homonym_nb = ""
                    sub_entry.create_and_add_related_form(
                        current_entry.get_lexeme() + homonym_nb, "main entry")
                elif is_mwe:
                    # Create a subentry
                    component = LexicalEntry(uid)
                    # An MDF subentry corresponds to an LMF lexical entry
                    mdf2lmf["lx"](value, component)
                    # Add it to the lexicon
                    lexicon.add_lexical_entry(component)
                    # Manage current entry
                    if sub_entry is not None:
                        current_entry = sub_entry
                    # Set component
                    homonym_nb = current_entry.get_homonymNumber()
                    if homonym_nb is None:
                        homonym_nb = ""
                    current_entry.create_and_add_component(component_nb, value)
                    component.create_and_add_related_form(
                        current_entry.get_lexeme() + homonym_nb,
                        "complex predicate")
                    component.set_independentWord(False)
                else:
                    # Create a new entry
                    current_entry = LexicalEntry(uid)
                    # Add it to the lexicon
                    lexicon.add_lexical_entry(current_entry)
                    # Reset main entry
                    main_entry = None
            # Map MDF marker and value to LMF representation
            try:
                if attrs is not None:
                    # There are attributes
                    attributes = {}
                    # Remove quotation marks from attributes if any
                    attrs = attrs.replace('"', '')
                    for attr in attrs.split(' '):
                        attributes.update(
                            {attr.split('=')[0]: attr.split('=')[1]})
                    # A customized marker starts with '__' characters
                    mdf2lmf["__" + marker](attributes, value, current_entry)
                else:
                    mdf2lmf[marker](value, current_entry)
                if sub_entry is not None:
                    current_entry = sub_entry
                    sub_entry = None
                if component is not None:
                    sub_entry = current_entry
                    current_entry = component
                    component = None
            except KeyError:
                # When printing, we need to convert 'unicode' into 'str' using 'utf-8' encoding:
                print Warning(
                    "MDF marker '%s' encountered for lexeme '%s' is not defined in configuration"
                    % (marker.encode(ENCODING),
                       current_entry.get_lexeme().encode(ENCODING)))
            except Error as exception:
                exception.handle()
    mdf_file.close()
    return lexicon

예제 #30

0

파일 보기

파일: test_output_tex.py 프로젝트: yuhsianglin/HimalCo

    def test_tex_write(self):
        import sys, os
        # Create LMF objects
        lexical_entry = LexicalEntry()
        lexical_entry.lemma = Lemma()
        lexical_entry.partOfSpeech = "toto"
        lexical_entry.status = "draft"
        lexical_entry.lemma.lexeme = "hello"
        lexicon = Lexicon()
        lexicon.add_lexical_entry(lexical_entry)
        lexical_resource = LexicalResource()
        lexical_resource.add_lexicon(lexicon)
        # Write LaTeX file and test result
        utest_path = sys.path[0] + '/'
        tex_filename = utest_path + "output.tex"
        tex_write(lexical_resource, tex_filename)
        tex_file = open(tex_filename, "r")
        begin_lines = [
            EOL, "\\begin{document}" + EOL, "\\maketitle" + EOL,
            "\\newpage" + EOL, EOL,
            "\\def\\mytextsc{\\bgroup\\obeyspaces\\mytextscaux}" + EOL,
            "\\def\\mytextscaux#1{\\mytextscauxii #1\\relax\\relax\\egroup}" +
            EOL, "\\def\\mytextscauxii#1{%" + EOL,
            "\\ifx\\relax#1\\else \\ifcat#1\\@sptoken{} \\expandafter\\expandafter\\expandafter\\mytextscauxii\\else"
            + EOL,
            "\\ifnum`#1=\\uccode`#1 {\\normalsize #1}\\else {\\footnotesize \\uppercase{#1}}\\fi \\expandafter\\expandafter\\expandafter\\mytextscauxii\\expandafter\\fi\\fi}"
            + EOL, EOL, "\\setlength\\parindent{0cm}" + EOL, EOL,
            "\\addmediapath{.}" + EOL, "\\addmediapath{./mp3}" + EOL,
            "\\addmediapath{./wav}" + EOL, "\\graphicspath{{" +
            os.path.abspath('.') + "/pylmflib/output/img/}}" + EOL, EOL,
            "\\newpage" + EOL, "\\begin{multicols}{2}" + EOL, EOL
        ]
        end_lines = ["\end{multicols}" + EOL, "\end{document}" + EOL]
        expected_lines = [
            "\\newpage" + EOL,
            "\\section*{\\centering- \\textbf{\ipa{H}} \\textbf{\ipa{h}} -}" +
            EOL,
            #"\\pdfbookmark[1]{\ipa{ H h }}{ H h }" + EOL,
            "\\paragraph{\\hspace{-0.5cm} \\textbf{\ipa{hello}}} \\hypertarget{01}{}"
            + EOL,
            "\markboth{\\textbf{\\ipa{hello}}}{}" + EOL,
            "\\textit{Status:} draft" + EOL,
            "\lhead{\\firstmark}" + EOL,
            "\\rhead{\\botmark}" + EOL,
            EOL
        ]
        self.assertListEqual(begin_lines + expected_lines + end_lines,
                             tex_file.readlines())
        tex_file.close()
        # Customize mapping
        my_lmf_tex = dict({
            "Lemma.lexeme":
            lambda lexical_entry: "is " + lexical_entry.get_lexeme(
            ) + "." + EOL,
            "LexicalEntry.id":
            lambda lexical_entry: "The lexical entry " + str(lexical_entry.
                                                             get_id()) + " ",
            "LexicalEntry.partOfSpeech":
            lambda lexical_entry: "Its grammatical category is " +
            lexical_entry.get_partOfSpeech() + "." + EOL,
            "LexicalEntry.status":
            lambda lexical_entry: "Warning: " + lexical_entry.get_status(
            ) + " version!" + EOL
        })
        my_order = [
            "LexicalEntry.id", "Lemma.lexeme", "LexicalEntry.partOfSpeech",
            "LexicalEntry.status"
        ]

        def lmf2tex(entry, font):
            result = ""
            for attribute in my_order:
                result += my_lmf_tex[attribute](entry)
            return result

        # Write LaTeX file and test result
        tex_write(lexical_resource, tex_filename, None, None, lmf2tex, font)
        tex_file = open(tex_filename, "r")
        expected_lines = [
            "\\newpage" + EOL,
            "\\section*{\\centering- \\textbf{\ipa{H}} \\textbf{\ipa{h}} -}" +
            EOL,
            #"\\pdfbookmark[1]{\ipa{ H h }}{ H h }" + EOL,
            "The lexical entry 01 is hello." + EOL,
            "Its grammatical category is toto." + EOL,
            "Warning: draft version!" + EOL,
            "\lhead{\\firstmark}" + EOL,
            "\\rhead{\\botmark}" + EOL,
            EOL
        ]
        self.assertListEqual(begin_lines + expected_lines + end_lines,
                             tex_file.readlines())
        tex_file.close()
        del lexical_entry.lemma
        lexical_entry.lemma = None
        del lexical_entry, lexicon
        lexicon = None
        del lexical_resource
        # Remove LaTeX file
        os.remove(tex_filename)