def test_get_sub_elements(self): # Declare instance and prepare XML element with its sub-elements instance = LexicalEntry() element = Element("LexicalEntry") lemma = SubElement(element, "Lemma") SubElement(lemma, "feat", att="lexeme", val="hello") SubElement(element, "feat", att="partOfSpeech", val="toto") SubElement(element, "feat", att="status", val="draft") # Test results get_sub_elements(instance, element) self.assertEqual(instance.get_lexeme(), "hello") self.assertEqual(instance.get_partOfSpeech(), "toto") self.assertEqual(instance.get_status(), "draft") del instance, element, lemma
def test_add_link(self): from morphology.related_form import RelatedForm input = Element("RelatedForm", targets="lx") form = RelatedForm() form.set_lexical_entry(LexicalEntry(id="lx_id")) # Create output element and sub-elements output = Element("RelatedForm", targets="lx") sub = SubElement(output, "a") sub.attrib["href"] = "lx_id1" # Fill in text sub.text = "lx" result = add_link(form, input) self.assertEqual(result[0], form) self.assertEqual(tostring(result[1]), tostring(output))
def build_sub_elements(object, element): """! @brief Create XML sub-elements to an existing XML element by parsing an LMF object instance. @param object An LMF object instance. @param element XML element for which sub-elements have to be created according to LMF object attributes. """ # Parse instance attributes for item in object.__dict__.items(): attr_name = item[0] attr_value = item[1] # For each defined public attribute, create an XML sub-element if not attr_name.startswith('_'): if attr_value is not None: # Handle boolean values if type(attr_value) is bool: attr_value = unicode(attr_value) # Check if the attribute is itself a class instance if type(attr_value) is list: # We suppose that a list always contains objects for item in attr_value: sub_element = SubElement(element, item.__class__.__name__) build_sub_elements(item, sub_element) elif type(attr_value) not in [int, str, unicode]: # If this is the case, create an XML element and restart the same operation recursively on this object sub_element = SubElement(element, attr_value.__class__.__name__) build_sub_elements(attr_value, sub_element) elif attr_name in ["dtdVersion", "id", "targets"]: # If this is a specical attribute ("id" or "targets"), it must be inserted as an XML element attribute if type(attr_value) is int: attr_value = unicode(attr_value) element.attrib.update({attr_name: attr_value}) if attr_name == "targets": add_link(object, element) else: # In all other cases, an XML sub-element must be created with the keyword name "feat" feat = SubElement(element, "feat", att=attr_name, val=attr_value) # Handle reserved characters and fonts handle_reserved(feat) handle_fv(feat) handle_fn(feat) handle_font(feat) # Special formatting handle_pinyin(feat) handle_tones(feat) handle_caps(feat)
def test_handle_caps(self): value = u"°trucs et°astuces" input = Element("name", val=value) # Create output element and sub-elements output = Element("name", val=value) sub1 = SubElement(output, "span") sub1.attrib["class"] = "sc" sub2 = SubElement(output, "span") sub2.attrib["class"] = "sc" # Fill in text output.text = "" sub1.text = "trucs" sub1.tail = " et" sub2.text = "astuces" sub2.tail = "" self.assertEqual(tostring(handle_caps(input)), tostring(output))
def test_handle_pinyin(self): value = "@at1 atA@at2 atB" input = Element("name", val=unicode(value)) # Create output element and sub-elements output = Element("name", val=unicode(value)) sub1 = SubElement(output, "span") sub1.attrib["class"] = "pinyin" sub2 = SubElement(output, "span") sub2.attrib["class"] = "pinyin" # Fill in text output.text = "" sub1.text = "at1" sub1.tail = " atA" sub2.text = "at2" sub2.tail = " atB" self.assertEqual(tostring(handle_pinyin(input)), tostring(output))
def test_prettify(self): # Create XML element with sub-element element = Element("LexicalEntry") SubElement(element, "Lemma") # Build expected result eol = unicode(EOL) expected_str = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + eol + "<LexicalEntry>" + eol + " <Lemma/>" + eol + "</LexicalEntry>" + eol # Test self.assertEqual(prettify(element), expected_str) del element
def test_write_result(self): import sys, os utest_path = sys.path[0] + '/' xml_filename = utest_path + "output.xml" # Create XML element with sub-element element = Element("LexicalEntry") SubElement(element, "Lemma") # Write result write_result(element, xml_filename) del element # Remove XML file os.remove(xml_filename)
def test_parse_xml(self): import sys, os utest_path = sys.path[0] + '/' xml_filename = utest_path + "input.xml" # Create XML tree element = Element("LexicalEntry") SubElement(element, "Lemma") tree = ElementTree(element) # Write tree then parse tree.write(xml_filename) parse_xml(xml_filename) del tree, element # Remove XML file os.remove(xml_filename)
def test_handle_fv(self): value1 = "fv:something here and fv:there" value2 = "|fv{something here} and fv:there" for value in [value1, value2]: input = Element("name", val=unicode(value)) # Create output element and sub-elements output = Element("name", val=unicode(value)) sub1 = SubElement(output, "span") sub1.attrib["class"] = "vernacular" sub2 = SubElement(output, "span") sub2.attrib["class"] = "vernacular" # Fill in text output.text = "" if value == value1: sub1.text = "something" sub1.tail = " here and " elif value == value2: sub1.text = "something here" sub1.tail = " and " sub2.text = "there" sub2.tail = "" self.assertEqual(tostring(handle_fv(input)), tostring(output))
def test_handle_tones(self): ## Test "tone" value = u"LaM1H" input = Element("name", att="tone", val=value) # Create output element and sub-elements output = Element("name", att="tone", val=value) sub1 = SubElement(output, "sub") sub2 = SubElement(output, "sub") # Fill in text output.text = "L" sub1.text = "a" sub1.tail = "M" sub2.text = "1" sub2.tail = "H" self.assertEqual(tostring(handle_tones(input)), tostring(output)) ## Test "lexeme" value = "aa˩abb˧bcc˥c".decode(encoding=ENCODING) input = Element("name", att="lexeme", val=value) # Create output element and sub-elements output = Element("name", att="lexeme", val=value) sub = SubElement(output, "sub") # Fill in text output.text = "aa˩abb˧bcc˥".decode(encoding=ENCODING) sub.text = "c" self.assertEqual(tostring(handle_tones(input)), tostring(output)) ## Test others input = Element("name", att="other", val=value) output = Element("name", att="other", val=value) self.assertEqual(tostring(handle_tones(input)), tostring(output))
def test_handle_font(self): value = "blaA{bla1} blaB {bla2}blaC {bla3}" input = Element("name", val=unicode(value)) # Create output element and sub-elements output = Element("name", val=unicode(value)) sub1 = SubElement(output, "span") sub1.attrib["class"] = "ipa" sub2 = SubElement(output, "span") sub2.attrib["class"] = "ipa" sub3 = SubElement(output, "span") sub3.attrib["class"] = "ipa" # Fill in text output.text = "blaA" sub1.text = "bla1" sub1.tail = " blaB " sub2.text = "bla2" sub2.tail = "blaC " sub3.text = "bla3" sub3.tail = "" self.assertEqual(tostring(handle_font(input)), tostring(output))
def test_handle_fn(self): value1 = "textfn:this fn:but not this" value2 = "textfn:this |fn{and this}" for value in [value1, value2]: input = Element("name", val=unicode(value)) # Create output element and sub-elements output = Element("name", val=unicode(value)) sub1 = SubElement(output, "span") sub1.attrib["class"] = "national" sub2 = SubElement(output, "span") sub2.attrib["class"] = "national" # Fill in text output.text = "text" sub1.text = "this" sub1.tail = " " if value == value1: sub2.text = "but" sub2.tail = " not this" elif value == value2: sub2.text = "and this" sub2.tail = "" self.assertEqual(tostring(handle_fn(input)), tostring(output))