class Entry: """ This class represents an entry (record) from a Shoebox lexicon. Each entry consists of a collection of fields, stored as a special type of dictionary which keeps track of the sequence in which its keys were entered. """ def __init__(self): """ This method constructs a new Entry object. """ self._fields = SequentialDictionary() self._rawText = "" self._number = None self._subentries = None def __str__(self): """ This method defines the string representation of an entry. @rtype: string @return: an entry as a string in Standard Format """ s = "" fields = self.get_fields() for fm, fvs in self._fields.items(): for fv in fvs: s = s + "\n\\%s %s" % (fm, fv) return s def set_raw_text(self, rawText): """ This method provides access to the raw text from which the Entry object was parsed. @param rawText: raw Shoebox text from which entry was parsed @type rawText: string """ self._rawText = rawText def get_raw_text(self): """ This method sets the raw text from which the Entry object was parsed. @rtype: string """ return self._rawText def get_subentries(self): """ This method obtains all of the subentries for an entry. @rtype: list of Entry objects @returns: all of the subentries of an entry """ return self._subentries def add_subentry(self, subentry): """ This method adds to an entry a subentry, which is simply another Entry object. @param subentry: subentry @type subentry: Entry object : """ if not self._subentries: self._subentries = [] self._subentries.append(subentry) def set_number(self, number): """ This method sets the position of the entry in the dictionary as a cardinal number. @param number: number of entry @type number: integer """ self._number = number def get_number(self): """ This method obtains the position of the entry in the dictionary as a cardinal number. @rtype: integer """ return self._number def get_fields(self): """ This method obtains all of the fields found in the Entry object. @rtype: list of Field objects """ return self._fields.values() def get_field_markers(self): """ This method obtains of the field markers found in the Entry object. @return: the field markers of an entry @rtype: list """ return self._fields.keys() def get_values_by_marker(self, field_marker, sep=None) : return self.get_field_values_by_field_marker(field_marker, sep) def get_field_values_by_field_marker(self, field_marker, sep=None): """ This method returns all of the field values for a given field marker. If the L(sep) is set, it will return a string; otherwise, it will return a list of Field objects. @param field_marker: marker of desired field @type field_marker: string @param sep: separator for field values @type sep: string @rtype: string (if sep); otherwise, list of Field objects """ try: values = self._fields[field_marker] if sep == None: return values else: return sep.join(values) except KeyError: return None def get_field_as_string(self, field_marker, join_string=""): """ This method returns a particular field given a field marker. Returns a blank string if field is not found. @param field_marker: marker of desired field @type field_marker: string @param join_string: string used to join field values (default to blank string) @type join_string: string @rtype: string """ try: return join_string.join(self._fields[field_marker]) except KeyError: return "" def get_field(self, fieldMarker): """ This method returns a particular field given a field marker. @param fieldMarker: marker of desired field @type fieldMarker: string @rtype: Field object """ try: return Field(fieldMarker, self._fields[fieldMarker]) except KeyError: return None def set_field(self, fieldMarker, field): """ This method sets a field, given a marker and its associated data. @param fieldMarker: field marker to set @type fieldMarker: string @param field : field object associated with field marker @type field : Field """ fvs = [] fvs.append(fieldData) self._fields[fieldMarker] = fvs def set_field_values(self, fieldMarker, fieldValues): """ This method sets all of the values associated with a field. @param fieldMarker: field marker to set @type fieldMarker: string @param fieldValues: list of field values @type fieldValues: list """ self._fields[fieldMarker] = fieldValues def add_field(self, marker, value): """ This method adds a field to an entry if it does not already exist and adds a new value to the field of an entry if it does. @param marker: field marker @type marker: string @param value : field value @type value : string """ if self._fields.has_key(marker): fvs = self._fields[marker] fvs.append(value) else: fvs = [] fvs.append(value) self._fields[marker] = fvs def remove_field(self, fieldMarker): """ This method removes from an entry every field for a given field marker. It will not raise an error if the specified field does not exist. @param fieldMarker: field marker to be deleted @type fieldMarker: string """ if self._fields.has_key(fieldMarker): del self._fields[fieldMarker]
class Line: """This class defines a line of interlinear glossing, such as:: \\ref 9 \\t Vigei avapaviei atarisia. \\m vigei ava -pa -vi -ei atari -sia \\g 1.PL.INC go -PROG -1.PL.INCL -PRES fish -PURP \\p PRO.PERS V.I -SUFF.V.3 -SUFF.VI.4 -SUFF.VI.5 V.I -SUFF.V.4 \\fp Yumi bai go kisim pis. \\fe We're going fishing. The tiers of a line are saved as a sequential dictionary with all of its associated fields. Identified by the field marker \\ref by default.""" def __init__(self, label=None): """Constructor that initializes Line object.""" self._fields = SequentialDictionary() self._label = label return def add_field(self, field): """Add field to line.""" fm = field.get_marker() fv = field.get_values() self._fields[fm] = fv def get_field_markers(self): """Obtain list of unique fields for the line.""" return self._fields.keys() def get_field_as_string(self, field_marker, join_string=""): """ This method returns a particular field given a field marker. Returns a blank string if field is not found. @param field_marker: marker of desired field @type field_marker: string @param join_string: string used to join field values (default to blank string) @type join_string: string @rtype: string """ try: return join_string.join(self._fields[field_marker]) except KeyError: return "" def get_field_values_by_field_marker(self, field_marker, sep=None): """Obtain all fields for a line, given a field marker.""" try: values = self._fields[field_marker] if sep == None: return values else: return sep.join(values) except KeyError: return None # def getField(self, field_marker): # try: # return self._fields[field_marker] # except: # return None def get_field_values(self): """Obtain list of field values for the line.""" return self._fields.values() def get_label(self): """Obtain identifier for line.""" return self._label def get_raw_text(self): """Obtain original line of text.""" return self._rawtext def set_label(self, label): """Set identifier for line.""" self._label = label def set_raw_text(self, rawtext): """Set original line of text.""" self._rawtext = rawtext def get_morphemes(self): """Obtain a list of morpheme objects for the line.""" morphemes = [] indices = get_indices(self.getFieldValueByFieldMarker("m")) print "%s" % indices morphemeFormField = self.getFieldValueByFieldMarker("m") morphemeGlossField = self.getFieldValueByFieldMarker("g") morphemeFormSlices = get_slices_by_indices(morphemeFormField, indices) morphemeGlossSlices = get_slices_by_indices(morphemeGlossField, indices) for i in range(0, len(morphemeFormSlices)): m = Morpheme() m.set_form(morphemeFormSlices[i].strip(" ").strip("-")) m.set_gloss(morphemeGlossSlices[i].strip(" ").strip("-")) morphemes.append(m) return morphemes def get_words(self, flagParseMorphemes=True): """Obtain a list of word objects for the line.""" words = [] # Obtain raw field values lineWordFormField = self.get_field_values_by_field_marker("t") lineMorphemeFormField = self.get_field_values_by_field_marker("m") lineMorphemeGlossField = self.get_field_values_by_field_marker("g") linePOSField = self.get_field_values_by_field_marker("p") wordIndices = get_indices(lineWordFormField) # Slice raw field values by indices lineWordFormSlices = get_slices_by_indices(lineWordFormField, wordIndices) lineMorphemeFormSlices = get_slices_by_indices(lineMorphemeFormField, wordIndices) lineMorphemeGlossSlices = get_slices_by_indices( lineMorphemeGlossField, wordIndices) linePOSSlices = get_slices_by_indices(linePOSField, wordIndices) # Go through each slice for i in range(0, len(lineWordFormSlices)): wordForm = lineWordFormSlices[i] wordMorphemeForms = lineMorphemeFormSlices[i] wordMorphemeGlosses = lineMorphemeGlossSlices[i] wordPOS = linePOSSlices[i] # Initialize word object and set raw fields w = Word() w.set_form(wordForm.strip(" ").strip("-")) w.set_raw_morphemes(wordMorphemeForms.strip(" ").strip("-")) w.set_raw_gloss(wordMorphemeGlosses.strip(" ").strip("-")) w.set_part_of_speech(wordPOS.strip(" ").strip("-")) # Should the word be inflated with morpheme objects? # If so, build morpheme object for each morpheme in word if flagParseMorphemes: morphemes = [] # Get indices from morpheme-breakdown line in order to make slices morphemeIndices = get_indices(wordMorphemeForms) morphemeFormSlices = get_slices_by_indices( wordMorphemeForms, morphemeIndices) morphemeGlossSlices = get_slices_by_indices( wordMorphemeGlosses, morphemeIndices) morphemePOSSlices = get_slices_by_indices( wordPOS, morphemeIndices) # Go through each morpheme for i in range(0, len(morphemeFormSlices)): morphemeForm = morphemeFormSlices[i].strip(" ") morphemeGloss = morphemeGlossSlices[i].strip(" ") morphemePOS = morphemePOSSlices[i].strip(" ") # Construct morpheme object from slices m = Morpheme() m.set_form(morphemeForm) m.set_gloss(morphemeGloss) m.set_part_of_speech(morphemePOS) # Add cooked morpheme to temporary collection for word morphemes.append(m) # Inflate word with cooked morphemes w.set_morphemes(morphemes) words.append(w) return words def get_field_value_by_field_marker_and_column(self, field_marker, columnIndex): """Get values for line, given a field and column index.""" fv = self.getFieldValueByFieldMarker(field_marker) field_markers = self.getFieldMarkers() sliceFieldMarker = field_markers[columnIndex - 1] indices = getIndices(self.getFieldValueByFieldMarker(field_marker)) slices = get_slices_by_indices(fv, indices) return slices[columnIndex - 1]
class Entry: """ This class represents an entry (record) from a Shoebox lexicon. Each entry consists of a collection of fields, stored as a special type of dictionary which keeps track of the sequence in which its keys were entered. """ def __init__(self): """ This method constructs a new Entry object. """ self._fields = SequentialDictionary() self._rawText = "" self._number = None self._subentries = None def __str__(self): """ This method defines the string representation of an entry. @rtype: string @return: an entry as a string in Standard Format """ s = "" fields = self.get_fields() for fm, fvs in self._fields.items(): for fv in fvs: s = s + "\n\\%s %s" % (fm, fv) return s def set_raw_text(self, rawText): """ This method provides access to the raw text from which the Entry object was parsed. @param rawText: raw Shoebox text from which entry was parsed @type rawText: string """ self._rawText = rawText def get_raw_text(self): """ This method sets the raw text from which the Entry object was parsed. @rtype: string """ return self._rawText def get_subentries(self): """ This method obtains all of the subentries for an entry. @rtype: list of Entry objects @returns: all of the subentries of an entry """ return self._subentries def add_subentry(self, subentry): """ This method adds to an entry a subentry, which is simply another Entry object. @param subentry: subentry @type subentry: Entry object : """ if not self._subentries: self._subentries = [] self._subentries.append(subentry) def set_number(self, number): """ This method sets the position of the entry in the dictionary as a cardinal number. @param number: number of entry @type number: integer """ self._number = number def get_number(self): """ This method obtains the position of the entry in the dictionary as a cardinal number. @rtype: integer """ return self._number def get_fields(self): """ This method obtains all of the fields found in the Entry object. @rtype: list of Field objects """ return self._fields.values() def get_field_markers(self): """ This method obtains of the field markers found in the Entry object. @return: the field markers of an entry @rtype: list """ return self._fields.keys() def get_values_by_marker(self, field_marker, sep=None): return self.get_field_values_by_field_marker(field_marker, sep) def get_field_values_by_field_marker(self, field_marker, sep=None): """ This method returns all of the field values for a given field marker. If the L(sep) is set, it will return a string; otherwise, it will return a list of Field objects. @param field_marker: marker of desired field @type field_marker: string @param sep: separator for field values @type sep: string @rtype: string (if sep); otherwise, list of Field objects """ try: values = self._fields[field_marker] if sep == None: return values else: return sep.join(values) except KeyError: return None def get_field_as_string(self, field_marker, join_string=""): """ This method returns a particular field given a field marker. Returns a blank string if field is not found. @param field_marker: marker of desired field @type field_marker: string @param join_string: string used to join field values (default to blank string) @type join_string: string @rtype: string """ try: return join_string.join(self._fields[field_marker]) except KeyError: return "" def get_field(self, fieldMarker): """ This method returns a particular field given a field marker. @param fieldMarker: marker of desired field @type fieldMarker: string @rtype: Field object """ try: return Field(fieldMarker, self._fields[fieldMarker]) except KeyError: return None def set_field(self, fieldMarker, field): """ This method sets a field, given a marker and its associated data. @param fieldMarker: field marker to set @type fieldMarker: string @param field : field object associated with field marker @type field : Field """ fvs = [] fvs.append(fieldData) self._fields[fieldMarker] = fvs def set_field_values(self, fieldMarker, fieldValues): """ This method sets all of the values associated with a field. @param fieldMarker: field marker to set @type fieldMarker: string @param fieldValues: list of field values @type fieldValues: list """ self._fields[fieldMarker] = fieldValues def add_field(self, marker, value): """ This method adds a field to an entry if it does not already exist and adds a new value to the field of an entry if it does. @param marker: field marker @type marker: string @param value : field value @type value : string """ if self._fields.has_key(marker): fvs = self._fields[marker] fvs.append(value) else: fvs = [] fvs.append(value) self._fields[marker] = fvs def remove_field(self, fieldMarker): """ This method removes from an entry every field for a given field marker. It will not raise an error if the specified field does not exist. @param fieldMarker: field marker to be deleted @type fieldMarker: string """ if self._fields.has_key(fieldMarker): del self._fields[fieldMarker]
class Line: """This class defines a line of interlinear glossing, such as:: \\ref 9 \\t Vigei avapaviei atarisia. \\m vigei ava -pa -vi -ei atari -sia \\g 1.PL.INC go -PROG -1.PL.INCL -PRES fish -PURP \\p PRO.PERS V.I -SUFF.V.3 -SUFF.VI.4 -SUFF.VI.5 V.I -SUFF.V.4 \\fp Yumi bai go kisim pis. \\fe We're going fishing. The tiers of a line are saved as a sequential dictionary with all of its associated fields. Identified by the field marker \\ref by default.""" def __init__(self, label=None): """Constructor that initializes Line object.""" self._fields = SequentialDictionary() self._label = label return def add_field(self, field): """Add field to line.""" fm = field.get_marker() fv = field.get_values() self._fields[fm] = fv def get_field_markers(self): """Obtain list of unique fields for the line.""" return self._fields.keys() def get_field_as_string(self, field_marker, join_string=""): """ This method returns a particular field given a field marker. Returns a blank string if field is not found. @param field_marker: marker of desired field @type field_marker: string @param join_string: string used to join field values (default to blank string) @type join_string: string @rtype: string """ try: return join_string.join(self._fields[field_marker]) except KeyError: return "" def get_field_values_by_field_marker(self, field_marker, sep=None): """Obtain all fields for a line, given a field marker.""" try: values = self._fields[field_marker] if sep == None: return values else: return sep.join(values) except KeyError: return None # def getField(self, field_marker): # try: # return self._fields[field_marker] # except: # return None def get_field_values(self): """Obtain list of field values for the line.""" return self._fields.values() def get_label(self): """Obtain identifier for line.""" return self._label def get_raw_text(self): """Obtain original line of text.""" return self._rawtext def set_label(self, label): """Set identifier for line.""" self._label = label def set_raw_text(self, rawtext): """Set original line of text.""" self._rawtext = rawtext def get_morphemes(self): """Obtain a list of morpheme objects for the line.""" morphemes = [] indices = get_indices(self.getFieldValueByFieldMarker("m")) print "%s" % indices morphemeFormField = self.getFieldValueByFieldMarker("m") morphemeGlossField = self.getFieldValueByFieldMarker("g") morphemeFormSlices = get_slices_by_indices(morphemeFormField, indices) morphemeGlossSlices = get_slices_by_indices(morphemeGlossField, indices) for i in range(0, len(morphemeFormSlices)): m = Morpheme() m.set_form(morphemeFormSlices[i].strip(" ").strip("-")) m.set_gloss(morphemeGlossSlices[i].strip(" ").strip("-")) morphemes.append(m) return morphemes def get_words(self, flagParseMorphemes=True): """Obtain a list of word objects for the line.""" words = [] # Obtain raw field values lineWordFormField = self.get_field_values_by_field_marker("t") lineMorphemeFormField = self.get_field_values_by_field_marker("m") lineMorphemeGlossField = self.get_field_values_by_field_marker("g") linePOSField = self.get_field_values_by_field_marker("p") wordIndices = get_indices(lineWordFormField) # Slice raw field values by indices lineWordFormSlices = get_slices_by_indices(lineWordFormField, wordIndices) lineMorphemeFormSlices = get_slices_by_indices(lineMorphemeFormField, wordIndices) lineMorphemeGlossSlices = get_slices_by_indices(lineMorphemeGlossField, wordIndices) linePOSSlices = get_slices_by_indices(linePOSField, wordIndices) # Go through each slice for i in range(0, len(lineWordFormSlices)): wordForm = lineWordFormSlices[i] wordMorphemeForms = lineMorphemeFormSlices[i] wordMorphemeGlosses = lineMorphemeGlossSlices[i] wordPOS = linePOSSlices[i] # Initialize word object and set raw fields w = Word() w.set_form(wordForm.strip(" ").strip("-")) w.set_raw_morphemes(wordMorphemeForms.strip(" ").strip("-")) w.set_raw_gloss(wordMorphemeGlosses.strip(" ").strip("-")) w.set_part_of_speech(wordPOS.strip(" ").strip("-")) # Should the word be inflated with morpheme objects? # If so, build morpheme object for each morpheme in word if flagParseMorphemes: morphemes = [] # Get indices from morpheme-breakdown line in order to make slices morphemeIndices = get_indices(wordMorphemeForms) morphemeFormSlices = get_slices_by_indices(wordMorphemeForms, morphemeIndices) morphemeGlossSlices = get_slices_by_indices(wordMorphemeGlosses, morphemeIndices) morphemePOSSlices = get_slices_by_indices(wordPOS, morphemeIndices) # Go through each morpheme for i in range(0, len(morphemeFormSlices)): morphemeForm = morphemeFormSlices[i].strip(" ") morphemeGloss = morphemeGlossSlices[i].strip(" ") morphemePOS = morphemePOSSlices[i].strip(" ") # Construct morpheme object from slices m = Morpheme() m.set_form(morphemeForm) m.set_gloss(morphemeGloss) m.set_part_of_speech(morphemePOS) # Add cooked morpheme to temporary collection for word morphemes.append(m) # Inflate word with cooked morphemes w.set_morphemes(morphemes) words.append(w) return words def get_field_value_by_field_marker_and_column(self, field_marker, columnIndex): """Get values for line, given a field and column index.""" fv = self.getFieldValueByFieldMarker(field_marker) field_markers = self.getFieldMarkers() sliceFieldMarker = field_markers[columnIndex-1] indices = getIndices(self.getFieldValueByFieldMarker(field_marker)) slices = get_slices_by_indices(fv, indices) return slices[columnIndex-1]