def check_attr_type(val, typ, msg): """! @brief Check that attribute value is of specified type. @param val The attribute value to check. @param typ The allowed Python type(s): simple, or Python set or list. @param msg The message to display if value is not of correct type. """ # Python set or list of allowed types if type(typ) is set or type(typ) is list: if type(val) not in typ: print(Warning(msg)) # Simple allowed type elif type(val) is not typ: print(Warning(msg))
def format_part_of_speech(lexical_entry, font, mapping=partOfSpeech_tex, language=None): """! @brief Display part of speech in LaTeX format. @param lexical_entry The current Lexical Entry LMF instance. @param font A Python dictionary giving the vernacular, national, regional fonts to apply to a text in LaTeX format. @param mapping A Python dictionary giving the mapping between LMF part of speech LexicalEntry attribute value and LaTeX layout. @param language Language to consider to display part of speech. @return A string representing part of speech in LaTeX format. """ result = "" if lexical_entry.get_partOfSpeech() is not None: try: if language is None: result += "\\textit{" + mapping[ lexical_entry.get_partOfSpeech()] + "}. " else: result += "\\textit{" + mapping[ (language, lexical_entry.get_partOfSpeech())] + "}. " except KeyError: print Warning( "Part of speech value '%s' encountered for lexeme '%s' is not defined in configuration" % (lexical_entry.get_partOfSpeech().encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING))) return result
def check_lx(lexical_entry, lx_tmp): import os if lexical_entry.get_lexeme() != lx_tmp: print Warning( "Lexeme '%s' generated for lexical entry '%s' is not consistant." % (lx_tmp.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))
def check_duration_format(duration): """! @brief Verify that duration format is composed as follows: PTxxHxxMxxS (ISO 8601: 'P' for Period). If not, display a Warning message. @param duration Duration to check. """ import re if not re.match("^PT[0-2]\dH[0-5]\dM[0-5]\dS$", duration): print Warning( "Duration must be formatted as follows: PTxxHxxMxxS (given duration is %s)" % duration.encode(ENCODING))
def check_time_format(time): """! @brief Verify that time format is composed as follows: THH:MM:SS,MSMS (ISO 8601: 'T' for Time). If not, display a Warning message. @param time Time to check. """ import re if not re.match("^T[0-2]\d:[0-5]\d:[0-5]\d(\,\d+|)$", time): print Warning( "Time must be formatted as follows: THH:MM:SS,MSMS (given time is %s)" % time.encode(ENCODING))
def check_date_format(date): """! @brief Verify that date format is composed as follows: YYYY-MM-DD (ISO 8601). If not, display a Warning message. @param date Date to check. """ import re if not re.match("^\d{4}-[01]\d-[0-3]\d$", date): print Warning( "Date must be formatted as follows: YYYY-MM-DD (given date is %s)" % date.encode(ENCODING))
def check_se(lexical_entry, se_tmp): import os ok = False for form in lexical_entry.find_related_forms(mdf_semanticRelation["se"]): if form == se_tmp: ok = True if not ok: print Warning( "Subentry '%s' generated for lexical entry '%s' is not consistant." % (se_tmp.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))
def check_nep(lexical_entry, nep): import os ok = False for form in lexical_entry.get_citation_forms(script_name="devanagari"): if form == nep: ok = True if not ok: print Warning( "Citation form '%s' of lexical entry '%s' is not consistant with generated one." % (nep.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING)))
def check_attr_range(value, range, msg, mapping=None): """! @brief Check that attribute value is in specified range. @param value The attribute value to check. @param range A Python set giving the range of allowed values. @param msg The message to display if value is out-of-range. @param mapping A Python dictionary giving mapping between values (i.e. from MDF to LMF) @return The value to set, or None if out-of-range. """ # Check value if value not in range: # Check converted value if mapping is not None: try: converted_value = mapping[value] except KeyError: print(Warning(msg)) else: # Converted value to set return converted_value else: print(Warning(msg)) else: # Value to set return value
def compare_sd(x, y): """Compare 2 semantic domains between each other. """ try: # Both equal => do nothing if sd_order[x] == sd_order[y]: return 0 # If the 1st one is lower than the 2nd one, its rank is decremented if sd_order[x] < sd_order[y]: return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif sd_order[x] > sd_order[y]: return 1 except KeyError: print Warning("Cannot compare " + x.encode(ENCODING) + " and " + y.encode(ENCODING)) return -1
def compare(x, y): """Compare 2 elements between each other. """ # Before comparing, remove acute accents from strings if any x = x.replace(u"\u0301", '').replace(u"\u0302", '') y = y.replace(u"\u0301", '').replace(u"\u0302", '') for i in range(min(len(x), len(y))): try: if type(sort_order) is not type(dict()): if sort_order(x[i]) == sort_order(y[i]): continue # If the 1st one is lower than the 2nd one, its rank is decremented if sort_order(x[i]) < sort_order(y[i]): return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif sort_order(x[i]) > sort_order(y[i]): return 1 else: if sort_order[x[i]] == sort_order[y[i]]: continue # If the 1st one is lower than the 2nd one, its rank is decremented if sort_order[x[i]] < sort_order[y[i]]: return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif sort_order[x[i]] > sort_order[y[i]]: return 1 # Handle other characters except KeyError: if options.verbose: print Warning("Cannot compare " + x[i].encode(ENCODING) + " and " + y[i].encode(ENCODING)) if x[i] == y[i]: continue if x[i] < y[i]: return -1 elif x[i] > y[i]: return 1 # If both strings do not have the same length, they do not equal => the smallest string is the shortest one if len(x) < len(y): return -1 elif len(x) > len(y): return 1 # If all characters match, both equal => do nothing return 0
def compare(x, y): """Compare 2 elements between each other. """ if items(x) == items(y) and condition(x): # Classify similar entries by homonym number nb_x = x.get_homonymNumber() if nb_x is None: nb_x = 0 nb_y = y.get_homonymNumber() if nb_y is None: nb_y = 0 # If the 1st one is lower than the 2nd one, its rank is decremented if nb_x < nb_y: return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif nb_x > nb_y: return 1 else: print Warning( "Several lexical entries '%s' exist. Please solve this issue by specifying the homonym number." % items(x).encode(ENCODING)) # Do nothing return 0
def setUp(self): # Instantiate a Warning object self.warning = Warning("This is a warning.")
def compare_lx(x, y): unknown = set(["xxxx", "???", ""]) cmp_x = x cmp_y = y pattern = "^([" + initials.replace('j', '').replace( 'w', '' ) + "]{0,3})([" + rimes + "]{1,2})#?([" + tones + "]{0,2})[$#]?[123]?(.*)$" n = 5 while (n > 0): initial_x = "" rime_x = "" tone_x = "" initial_y = "" rime_y = "" tone_y = "" char_x = [] char_y = [] found = re.match(pattern, cmp_x) if found is None: if len(cmp_x) == 1: if cmp_x in initials: initial_x = cmp_x rime_x = "" elif cmp_x in rimes: initial_x = "" rime_x = cmp_x tone_x = "" cmp_x = "" else: if cmp_x not in unknown: print Warning("Cannot sort " + cmp_x.encode(ENCODING)) return 1 else: initial_x = found.group(1) rime_x = found.group(2) tone_x = found.group(3) cmp_x = found.group(4) # Before comparing, handle combining tilde of 'ɻ̃' if any if rime_x == u"\u0303": initial_x += rime_x rime_x = "" found = re.match(pattern, cmp_y) if found is None: if len(cmp_y) == 1: if cmp_y in initials: initial_y = cmp_y rime_y = "" elif cmp_y in rimes: initial_y = "" rime_y = cmp_y tone_y = "" cmp_y = "" else: if cmp_y not in unknown: print Warning("Cannot sort " + cmp_y.encode(ENCODING)) return -1 else: initial_y = found.group(1) rime_y = found.group(2) tone_y = found.group(3) cmp_y = found.group(4) # Before comparing, handle combining tilde of 'ɻ̃' if any if rime_y == u"\u0303": initial_y += rime_y rime_y = "" if len(initial_x) != 0: char_x.append(initial_x) if len(rime_x) != 0: char_x.append(rime_x) if len(initial_y) != 0: char_y.append(initial_y) if len(rime_y) != 0: char_y.append(rime_y) try: try: char_x[0] except IndexError: return -1 try: char_y[0] except IndexError: return 1 # If the 1st one is lower than the 2nd one, its rank is decremented if sort_order[char_x[0]] < sort_order[char_y[0]]: return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif sort_order[char_x[0]] > sort_order[char_y[0]]: return 1 else: # sort_order[char_x[0]] == sort_order[char_y[0]] single = False try: char_x[1] except IndexError: single = True try: char_y[1] except IndexError: single = True if not single: # If the 1st one is lower than the 2nd one, its rank is decremented if sort_order[char_x[1]] < sort_order[char_y[1]]: return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif sort_order[char_x[1]] > sort_order[char_y[1]]: return 1 # sort_order[char_x[1]] == sort_order[char_y[1]] # If the 1st one is lower than the 2nd one, its rank is decremented if sort_order[tone_x] < sort_order[tone_y]: return -1 # If the 1st one is greater than the 2nd one, its rank is incremented elif sort_order[tone_x] > sort_order[tone_y]: return 1 else: # sort_order[tone_x] == sort_order[tone_y] if cmp_x == "": return -1 if cmp_y == "": return 1 n -= 1 if n == 0: # If all characters match, both equal => do nothing return 0 except KeyError: print Warning("Cannot compare " + x.encode(ENCODING) + " and " + y.encode(ENCODING)) return 0
def tex_write(object, filename, preamble=None, introduction=None, lmf2tex=lmf_to_tex, font=None, items=lambda lexical_entry: lexical_entry.get_lexeme(), sort_order=None, paradigms=[], tables=[], title=None, tex_language=None, tex_other_languages=[]): """! @brief Write a LaTeX file. Note that the lexicon must already be ordered at this point. Here, parameters 'items' and 'sort_order' are only used to define chapters. @param object The LMF instance to convert into LaTeX output format. @param filename The name of the LaTeX file to write with full path, for instance 'user/output.tex'. @param preamble The name of the LaTeX file with full path containing the LaTeX header of the document, for instance 'user/config/japhug.tex'. Default value is None. @param introduction The name of the LaTeX file with full path containing the LaTeX introduction of the document, for instance 'user/config/introduction.tex'. Default value is None. @param lmf2tex A function giving the mapping from LMF representation information that must be written to LaTeX commands, in a defined order. Default value is 'lmf_to_tex' function defined in 'pylmflib/config/tex.py'. Please refer to it as an example. @param font A Python dictionary giving the vernacular, national, regional fonts to apply to a text in LaTeX format. @param items Lambda function giving the item to sort. Default value is 'lambda lexical_entry: lexical_entry.get_lexeme()', which means that the items to sort are lexemes. @param sort_order Default value is 'None', which means that the LaTeX output is alphabetically ordered. @param paradigms A Python list of LaTeX filenames with full path containing the paradigms in LaTeX format. Default value is an empty list. @param tables The name of the LaTeX file with full path containing some notes to add at the end of the LaTeX document, for instance 'user/config/conclusion.tex'. Default value is None. @param title A Python string containing the title of the LaTeX document. Default value is None. @param tex_language A Python string giving the default language to set in LaTeX. """ import string, os # Define font if font is None: font = pylmflib.config.xml.font tex_file = open_write(filename) # Add file header if any tex_file.write(file_read(preamble)) # Continue the header if needed if title is not None: tex_file.write("\\title{" + title + "}" + EOL) if tex_language is not None: tex_file.write("\setdefaultlanguage{" + tex_language + "}" + EOL) if tex_other_languages is not None: # Ajouté par Benjamin pour permettre de proprement gérer plusieurs langues dans le fichier XeLaTeX. tex_file.write("\setotherlanguages{" + ", ".join(tex_other_languages) + "}" + EOL) # Insert LaTeX commands to create a document tex_file.write(EOL + "\\begin{document}" + EOL) tex_file.write("\\maketitle" + EOL) tex_file.write("\\newpage" + EOL) # Add introduction if any if introduction is not None: tex_file.write("\\markboth{INTRODUCTION}{}" + EOL * 2) tex_file.write(file_read(introduction)) # Add command for small caps tex_file.write(EOL + "\\def\\mytextsc{\\bgroup\\obeyspaces\\mytextscaux}" + EOL) tex_file.write( "\\def\\mytextscaux#1{\\mytextscauxii #1\\relax\\relax\\egroup}" + EOL) tex_file.write("\\def\\mytextscauxii#1{%" + EOL) tex_file.write( "\\ifx\\relax#1\\else \\ifcat#1\\@sptoken{} \\expandafter\\expandafter\\expandafter\\mytextscauxii\\else" + EOL) tex_file.write( "\\ifnum`#1=\\uccode`#1 {\\normalsize #1}\\else {\\footnotesize \\uppercase{#1}}\\fi \\expandafter\\expandafter\\expandafter\\mytextscauxii\\expandafter\\fi\\fi}" + EOL * 2) # Configure space indent tex_file.write("\\setlength\\parindent{0cm}" + EOL) # Insert data path configuration # Unix-style paths audio_path = pylmflib.config.xml.audio_path graphic_path = os.path.abspath('.') if os.name != 'posix': # Windows-style paths audio_path = audio_path.replace("\\", "/") graphic_path = graphic_path.replace("\\", "/") tex_file.write(EOL + "\\addmediapath{" + audio_path.rstrip("/") + "}" + EOL) tex_file.write("\\addmediapath{" + audio_path + "mp3}" + EOL) tex_file.write("\\addmediapath{" + audio_path + "wav}" + EOL) tex_file.write("\\graphicspath{{" + graphic_path + "/pylmflib/output/img/}}" + EOL * 2) # Configure 2 columns tex_file.write("\\newpage" + EOL) tex_file.write("\\begin{multicols}{2}" + EOL * 2) if sort_order is None: # Lowercase and uppercase letters must have the same rank sort_order = dict([(c, ord(c)) for c in string.lowercase]) up = dict([(c, ord(c) + 32) for c in string.uppercase]) sort_order.update(up) sort_order.update({'': 0, ' ': 0}) # For each element to write, get the corresponding LMF value if object.__class__.__name__ == "LexicalResource": for lexicon in object.get_lexicons(): previous_character = '' current_character = '' # Lexicon is already ordered for lexical_entry in lexicon.get_lexical_entries(): # Consider only main entries (subentries and components will be written as parts of the main entry) if lexical_entry.find_related_forms( "main entry" ) == [] and lexical_entry.get_independentWord() is not False: # Check if current element is a lexeme starting with a different character than previous lexeme try: current_character = items(lexical_entry)[0] if sort_order[items(lexical_entry)[0:1]]: current_character = items(lexical_entry)[0:1] if sort_order[items(lexical_entry)[0:2]]: current_character = items(lexical_entry)[0:2] except IndexError: pass except KeyError: pass except TypeError: pass try: if ( (type(sort_order) is not type(dict())) and ((previous_character == '') or (sort_order(current_character) != sort_order(previous_character))) ) \ or ( (type(sort_order) is type(dict())) and (int(sort_order[current_character]) != int(sort_order[previous_character])) ): # Do not consider special characters previous_character = current_character tex_file.write("\\newpage" + EOL) title = '' if type(sort_order) is not type(dict()): title += ' ' + font[NATIONAL]( current_character) else: for key, value in sorted(sort_order.items(), key=lambda x: x[1]): if int(value) == int( sort_order[current_character]): title += ' ' + font[VERNACULAR](key) tex_file.write("\\section*{\\centering-" + handle_reserved(title) + " -}" + EOL) #tex_file.write("\\pdfbookmark[1]{" + title + " }{" + title + " }" + EOL) tex_file.write(lmf2tex(lexical_entry, font)) if len(paradigms) != 0: tex_file.write(insert_references(lexical_entry)) tex_file.write("\\lhead{\\firstmark}" + EOL) tex_file.write("\\rhead{\\botmark}" + EOL) # Separate lexical entries from each others with a blank line tex_file.write(EOL) # Handle subentries for related_form in lexical_entry.get_related_forms( "subentry"): if related_form.get_lexical_entry() is not None: tex_file.write( lmf2tex(related_form.get_lexical_entry(), font)) if len(paradigms) != 0: tex_file.write( insert_references( related_form.get_lexical_entry())) # Separate sub-entries from each others with a blank line tex_file.write(EOL) except KeyError: print Warning("Cannot sort item %s" % items(lexical_entry).encode(ENCODING)) except IndexError: # Item is an empty string pass else: raise OutputError(object, "Object to write must be a Lexical Resource.") # Insert LaTeX commands to finish the document properly tex_file.write("\end{multicols}" + EOL) # Insert paradigms if any for filename in paradigms: tex_file.write(EOL) tex_file.write("\\newpage" + EOL) tex_file.write("\markboth{paradigms}{}" + EOL) tex_file.write(file_read(filename)) tex_file.write(EOL) # Insert other tables if any for filename in tables: tex_file.write(EOL) tex_file.write("\\newpage" + EOL) tex_file.write(file_read(filename)) tex_file.write(EOL) tex_file.write("\end{document}" + EOL) tex_file.close()
def format_audio(lexical_entry, font): """! @brief Embed sound file into PDF. @param lexical_entry The current Lexical Entry LMF instance. @param font A Python dictionary giving the vernacular, national, regional fonts to apply to a text in LaTeX format. @return A string embedding sound in LaTeX format. """ import os from os.path import basename, isfile # To access options from pylmflib import options global options result = "" if not options.audio: return result for form_representation in lexical_entry.get_form_representations(): if form_representation.get_audio() is not None: # Embed local sound file # \includemedia[<options>]{<poster text>}{<main Flash (SWF) file or URL | 3D (PRC, U3D) file>} # To include audio file in PDF, replace WAV extension by MP3 extension and search in audio, MP3 and WAV folders file_name = form_representation.get_audio().get_fileName().replace( ".wav", ".mp3") file_path = [] if os.name == 'posix': # Unix-style paths file_path.append(pylmflib.config.xml.audio_path + file_name) file_path.append(pylmflib.config.xml.audio_path + "mp3/" + file_name) file_path.append(pylmflib.config.xml.audio_path + "wav/" + file_name) else: # Windows-style paths audio_path = pylmflib.config.xml.audio_path.replace("/", "\\") file_path.append(audio_path + file_name) file_path.append(audio_path + "mp3\\" + file_name) file_path.append(audio_path + "wav\\" + file_name) exist = False for audio_file in file_path: if isfile(audio_file): exist = True break if not exist: print Warning( "Sound file '%s' encountered for lexeme '%s' does not exist" % (file_name.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING))) return result file_name = file_name.replace('-', '\string-') result += "\includemedia[" + EOL +\ "\taddresource=" + file_name + "," + EOL +\ "\tflashvars={" + EOL +\ "\t\tsource=" + file_name + EOL +\ "\t\t&autoPlay=true" + EOL +\ "\t\t&autoRewind=true" + EOL +\ "\t\t&loop=false" + EOL +\ "\t\t&hideBar=true" + EOL +\ "\t\t&volume=1.0" + EOL +\ "\t\t&balance=0.0" + EOL +\ "}]{\includegraphics[scale=0.5]{sound.jpg}}{APlayer.swf}" # \mediabutton[<options>]{<normal button text or graphic>} result += " \\hspace{0.1cm}" + EOL return result
def mdf_read(filename=None, mdf2lmf=mdf_lmf, lexicon=None, id=None, encoding=ENCODING): """! @brief Read an MDF file. @param filename The name of the MDF file to read with full path, for instance 'user/input.txt'. @param mdf2lmf A Python dictionary describing the mapping between MDF markers and LMF representation. Default value is 'mdf_lmf' dictionary defined in 'pylmflib/config/mdf.py'. Please refer to it as an example. @param lexicon An existing Lexicon to fill with lexical entries to read. @param id A Python string identifying the lexicon to create. @param encoding Use 'utf-8' encoding by default. Otherwise, user has to precise the native encoding of its document. @return A Lexicon instance containing all lexical entries. """ import re # If not provided, create a Lexicon instance to contain all lexical entries if lexicon is None: lexicon = Lexicon(id) # Read in unicode if filename is None: filename = lexicon.get_entrySource() else: # Set lexicon attribute lexicon.set_entrySource(filename) # Read in unicode mdf_file = open_read(filename, encoding=encoding) # MDF syntax is the following: '\marker value' mdf_pattern = """^\\\(\w*) (<(.*)>)? ?(.*)$""" # Add each lexical entry to the lexicon current_entry = None sub_entry = None component = None main_entry = None for line in mdf_file.readlines(): # Do not parse empty lines if line != EOL: result = re.match(mdf_pattern, line) if result is None: # Line is empty => continue parsing next line continue marker = result.group(1) attrs = result.group(3) value = result.group(4) # Do not consider markers starting with an underscore character (e.g. '_sh' and '_DateStampHasFourDigitYear') if marker[0] == '_': continue # Remove trailing spaces and end-of-line characters value = value.rstrip(' \r\n') # Do not consider empty fields if value == "": continue # Check if the current entry is a multiword expression is_mwe = False if marker == "lf": lf = value.split(" = ") if lf[0].startswith("Component"): component_nb = lf[0].lstrip("Component") value = lf[1] is_mwe = True # 'lx' and 'se' markers indicate a new entry if marker == "lx" or marker == "se" or is_mwe: # Compute a unique identifier uid = uni2sampa(value) if marker == "se": # Create a subentry sub_entry = LexicalEntry(uid) # An MDF subentry corresponds to an LMF lexical entry mdf2lmf["lx"](value, sub_entry) # Add it to the lexicon lexicon.add_lexical_entry(sub_entry) # Manage main entry if main_entry is None: main_entry = current_entry else: current_entry = main_entry # Set main entry homonym_nb = current_entry.get_homonymNumber() if homonym_nb is None: homonym_nb = "" sub_entry.create_and_add_related_form( current_entry.get_lexeme() + homonym_nb, "main entry") elif is_mwe: # Create a subentry component = LexicalEntry(uid) # An MDF subentry corresponds to an LMF lexical entry mdf2lmf["lx"](value, component) # Add it to the lexicon lexicon.add_lexical_entry(component) # Manage current entry if sub_entry is not None: current_entry = sub_entry # Set component homonym_nb = current_entry.get_homonymNumber() if homonym_nb is None: homonym_nb = "" current_entry.create_and_add_component(component_nb, value) component.create_and_add_related_form( current_entry.get_lexeme() + homonym_nb, "complex predicate") component.set_independentWord(False) else: # Create a new entry current_entry = LexicalEntry(uid) # Add it to the lexicon lexicon.add_lexical_entry(current_entry) # Reset main entry main_entry = None # Map MDF marker and value to LMF representation try: if attrs is not None: # There are attributes attributes = {} # Remove quotation marks from attributes if any attrs = attrs.replace('"', '') for attr in attrs.split(' '): attributes.update( {attr.split('=')[0]: attr.split('=')[1]}) # A customized marker starts with '__' characters mdf2lmf["__" + marker](attributes, value, current_entry) else: mdf2lmf[marker](value, current_entry) if sub_entry is not None: current_entry = sub_entry sub_entry = None if component is not None: sub_entry = current_entry current_entry = component component = None except KeyError: # When printing, we need to convert 'unicode' into 'str' using 'utf-8' encoding: print Warning( "MDF marker '%s' encountered for lexeme '%s' is not defined in configuration" % (marker.encode(ENCODING), current_entry.get_lexeme().encode(ENCODING))) except Error as exception: exception.handle() mdf_file.close() return lexicon
def check_cross_references(self): """! @brief Check all cross-references in the lexicon. Fill the private attribute '__lexicalEntry' of each RelatedForm instance for all lexical entries. @return Lexicon instance. """ import os from string import digits if self.__checked: return self # Verifiy cross references only once self.__checked = True for lexical_entry in self.get_lexical_entries(): for related_form in lexical_entry.get_related_forms(): # From RelatedForm targets attribute, retrieve the pointed LexicalEntry instance related_lexeme = related_form.get_lexeme() # Check if there is an homonym number at the end of the related lexeme related_homonym_number = None if related_lexeme[-1] in digits: related_homonym_number = related_lexeme[-1] related_lexeme = related_lexeme[:-1] found_entry = self.find_lexical_entries( lambda lexical_entry: lexical_entry.get_lexeme( ) == related_lexeme) # Remove duplicate subentries from check if any if len(found_entry) == 2: if found_entry[0].is_subentry( ) and not found_entry[1].is_subentry(): # Keep only the first subentry found_entry = found_entry[:1] elif not found_entry[0].is_subentry( ) and found_entry[1].is_subentry(): # Keep only the second subentry found_entry = found_entry[1:] if len(found_entry) < 1: # No lexical entry with this lexeme exists print Warning( "Lexical entry '%s' does not exist. Please solve this issue by checking the related form of lexical entry '%s'." % (related_lexeme.encode(ENCODING), lexical_entry.get_lexeme().encode(ENCODING))) elif len(found_entry) > 1: # Several lexical entries with this lexeme exist => consider homonym number if any related_homonym = [] if related_homonym_number is not None: for related_entry in found_entry: if related_entry.get_homonymNumber( ) == related_homonym_number: related_homonym.append(related_entry) if len(related_homonym) != 1: print Warning( "Several lexical entries '%s' exist. Please solve this issue by renaming lexical entries correctly or by specifying the homonym number." % related_lexeme.encode(ENCODING)) else: # Save the found lexical entry related_form.set_lexical_entry(related_homonym[0]) else: # Save the found lexical entry related_form.set_lexical_entry(found_entry[0]) for component in lexical_entry.get_components(): # From Component targets attribute, retrieve the pointed LexicalEntry instance found_entries = self.find_lexical_entries( lambda lexical_entry: lexical_entry.get_lexeme( ) == component.get_lexeme()) for found_entry in found_entries: # Check that the found entry is a component if found_entry.is_component(): # Save the found lexical entry component.set_lexical_entry(found_entry) break return self