def format_lexc_xml(wordmap): analysis = xml_escape(wordmap['lemma']) analysis = analysis.replace('|', '<s mcs="wb"/>').replace('_', '<s mcs="mb"/>') analysis += '<s mcs="' + wordmap['pos'] + '"/>' if wordmap['is_proper']: analysis += '<s mcs="proper"/>' if wordmap['is_suffix']: analysis = "<s mcs='suffix'/>" + analysis if wordmap['is_prefix']: analysis += "<s mcs='prefix'/>" stub = xml_escape(wordmap['stub']) stub = stub.replace('|', '<s mcs="wb"/>').replace('_', '<s mcs="mb"/>') return (' <e><a>%s</a><i>%s</i><cont lexica="%s"/></e>' % (analysis, stub, " ".join(wordmap['new_paras'])))
def format_continuation_lexicon_xml(tsvparts): xmlstring = ' <e>' if tsvparts[1] != '': xmlstring += '<a>' for anal in tsvparts[1].split('|'): if anal in stuff2ftb3: anal = make_xmlid(anal) xmlstring += '<s mcs="' + anal + '"/>' else: xmlstring += xml_escape(anal) xmlstring += '</a>' else: xmlstring += '<a/>' xmlstring += "<i>" + xml_escape(tsvparts[2]) + "</i>" xmlstring += '<cont lexica="' + " ".join(tsvparts[3:]).replace("#", "_END") + '"/></e>\n' return xmlstring
def format_multichars_lexc_xml(): multichars = " <Multichar_Symbols>\n" for key, value in stuff2ftb3.items(): key = make_xmlid(key) if key != '': if value != '': multichars += " <mcs id='" + key + "'>" + xml_escape(value) + "</mcs>\n" else: multichars += " <mcs id='" + key + "'>" + key + "</mcs>\n" else: pass multichars += """<!-- Following specials exist in all versions of omorfi --> <mcs id="hyph">{hyph?}</mcs> <mcs id="deriv">»</mcs> <mcs id="infl">></mcs> <mcs id="wb">|</mcs> """ multichars += " <mcs id='VERSION'>" + version_id_easter_egg + '</mcs>\n' multichars += " </Multichar_Symbols>" return multichars
htmltags = dict([(n, XmlElementProto(n, False)) for n in _non_empty_html_tags] + [(n, XmlElementProto(n, True)) for n in _maybe_empty_html_tags] + [('script', XmlElementProto('script', element_class=Script))]) # I have a separate module that defines the html entity refs. Email # me if you would like a copy. ################################################################################ # 6: Visitors for the xml/html elements, etc. xml_default_visitors_map = default_visitors_map.copy() # o = obj_to_be_walked, w = walker (aka serializer) xml_default_visitors_map.update({ unicode: (lambda o, w: w.emit(xml_escape(o))), XmlName: (lambda o, w: w.emit(unicode(o))), XmlAttributes: (lambda o, w: [w.walk(i) for i in o]), XmlElementProto: (lambda o, w: (w.emit( safe_unicode('<%s />' % o.name) if o.can_be_empty else safe_unicode('<%s></%s>' % (o.name, o.name))))), XmlEntityRef: (lambda o, w: w.emit(safe_unicode('&%s;' % o.alpha))), }) @xml_default_visitors_map.register(XmlElement) def visit_xml_element(elem, walker): walker.emit_many(('<', elem.name)) walker.walk(elem.attrs) walker.emit('>') walker.walk(elem.children)
htmltags = dict( [(n, XmlElementProto(n, False)) for n in _non_empty_html_tags] + [(n, XmlElementProto(n, True)) for n in _maybe_empty_html_tags] + [('script', XmlElementProto('script', element_class=Script))]) # I have a separate module that defines the html entity refs. Email # me if you would like a copy. ################################################################################ # 6: Visitors for the xml/html elements, etc. xml_default_visitors_map = default_visitors_map.copy() # o = obj_to_be_walked, w = walker (aka serializer) xml_default_visitors_map.update({ unicode: (lambda o, w: w.emit(xml_escape(o))), XmlName: (lambda o, w: w.emit(unicode(o))), XmlAttributes: (lambda o, w: [w.walk(i) for i in o]), XmlElementProto: (lambda o, w: ( w.emit(safe_unicode('<%s />'%o.name) if o.can_be_empty else safe_unicode('<%s></%s>'%(o.name, o.name))))), XmlEntityRef: (lambda o, w: w.emit(safe_unicode('&%s;'%o.alpha))), }) @xml_default_visitors_map.register(XmlElement) def visit_xml_element(elem, walker): walker.emit_many(('<', elem.name)) walker.walk(elem.attrs) walker.emit('>') walker.walk(elem.children)
def convertFile (inputFile, outputFile): outputFile.write(header) lineno = 0 inItem = False questionText = "" questionType = "" questionTitle = "" questionno = 0 for line in inputFile: line = line.strip() lineno += 1 if line == "" and inItem: # End of item -> prepare, print & reset questionno+=1 # Prepare question questionText = xml_escape(questionText, quote=True) questionTitle = xml_escape(questionTitle, quote=True) print id + " " + questionType + " " + questionTitle # TEST !!! itempresentationelems, itemfeedbackelems, itemresprocessingelems = createQuestion (questionType, questionText) # Print if (itempresentationelems != None): outputFile.write( itemopen % (id, questionTitle) ) outputFile.write( itemmetadata % questionType ) outputFile.write( itempresentationopen % (questionTitle) ) outputFile.write( itempresentationelems ) outputFile.write( itempresentationclose ) outputFile.write( itemresprocessingopen ) outputFile.write( itemresprocessingelems ) outputFile.write( itemresprocessingclose ) outputFile.write( itemfeedbackelems ) outputFile.write( itemclose ) # Reset questionText = "" questionType = "" title = "" inItem = False continue if line == "" and not inItem: # Ignore empty lines continue if line[0] == "#": # Ignore comments continue if line[0:3] == "[t]": # Have title -> create new question inItem = True id = "il_0_qst_" + str(lineno) # id comes from line number # Type if line[3:6] == "[s]": # multiple choice, single answer questionType = questionTypeMCSingle if line[3:6] == "[m]": # multiple choice, multiple answers questionType = questionTypeMCMulti elif line[3:6] == "[g]": # gap question questionType = questionTypeGap # Title questionTitle = line[6:].strip() continue # Have (maybe several lines) of question text # Just add this to what I have had before if questionText == "": questionText = line else: questionText = questionText + "<br/>" + line outputFile.write(footer) print "Found %d questions.\n" % (questionno)