Example #1
0
class Entry:
  """
  This class represents an entry (record) from a Shoebox lexicon. Each entry
  consists of a collection of fields, stored as a special type of dictionary
  which keeps track of the sequence in which its keys were entered.
  """

  def __init__(self):
    """
    This method constructs a new Entry object.
    """
    self._fields     = SequentialDictionary()
    self._rawText    = ""
    self._number     = None
    self._subentries = None

  def __str__(self):
    """
    This method defines the string representation of an entry.

    @rtype:  string
    @return: an entry as a string in Standard Format
    """
    s = ""
    fields = self.get_fields()
    for fm, fvs in self._fields.items():
      for fv in fvs:
        s = s + "\n\\%s %s" % (fm, fv)          
    return s
    
  def set_raw_text(self, rawText):
    """
    This method provides access to the raw text from which the
    Entry object was parsed.
    
    @param rawText: raw Shoebox text from which entry was parsed
    @type  rawText: string
    """
    self._rawText = rawText

  def get_raw_text(self):
    """
    This method sets the raw text from which the Entry object was parsed.

    @rtype: string
    """
    return self._rawText
  
  def get_subentries(self):
    """
    This method obtains all of the subentries for an entry.

    @rtype: list of Entry objects
    @returns: all of the subentries of an entry
    """
    return self._subentries

  def add_subentry(self, subentry):
    """
    This method adds to an entry a subentry, which is simply another
    Entry object.

    @param subentry: subentry
    @type  subentry: Entry object    : 
    """
    if not self._subentries:
      self._subentries = []
    self._subentries.append(subentry)

  def set_number(self, number):
    """
    This method sets the position of the entry in
    the dictionary as a cardinal number.
    
    @param number: number of entry
    @type  number: integer
    """
    self._number = number

  def get_number(self):
    """
    This method obtains the position of the entry in the dictionary
    as a cardinal number.
    
    @rtype: integer
    """
    return self._number
  
  def get_fields(self):
    """
    This method obtains all of the fields found in the Entry object.
    
    @rtype: list of Field objects
    """
    return self._fields.values()

  def get_field_markers(self):
    """
    This method obtains of the field markers found in the Entry object.

    @return: the field markers of an entry
    @rtype: list
    """
    return self._fields.keys()

  def get_values_by_marker(self, field_marker, sep=None) :
    return self.get_field_values_by_field_marker(field_marker, sep)

  def get_field_values_by_field_marker(self, field_marker, sep=None):
    """
    This method returns all of the field values for a given field marker.
    If the L(sep) is set, it will return a string; otherwise, it will
    return a list of Field objects.
    
    @param field_marker: marker of desired field
    @type  field_marker: string
    @param sep: separator for field values
    @type  sep: string    
    @rtype: string (if sep); otherwise, list of Field objects
    """
    try:
      values = self._fields[field_marker]
      if sep == None:
        return values
      else:
        return sep.join(values)
    except KeyError:
      return None

  def get_field_as_string(self,
                          field_marker,
                          join_string=""):
    """
    This method returns a particular field given a field marker.
    Returns a blank string if field is not found.
    
    @param field_marker: marker of desired field
    @type  field_marker: string
    @param join_string: string used to join field values (default to blank string)
    @type  join_string: string
    @rtype: string
    """
    try:
      return join_string.join(self._fields[field_marker])
    except KeyError:
      return ""

  def get_field(self, fieldMarker):
    """
    This method returns a particular field given a field marker.
    
    @param fieldMarker: marker of desired field
    @type  fieldMarker: string
    @rtype: Field object
    """
    try:
      return Field(fieldMarker, self._fields[fieldMarker])
    except KeyError:
      return None

  def set_field(self, fieldMarker, field):
    """
    This method sets a field, given a marker and its associated data.
    
    @param fieldMarker: field marker to set
    @type  fieldMarker: string
    @param field      : field object associated with field marker
    @type  field      : Field
    """
    fvs = []
    fvs.append(fieldData)
    self._fields[fieldMarker] = fvs

  def set_field_values(self, fieldMarker, fieldValues):
    """
    This method sets all of the values associated with a field.
    
    @param fieldMarker: field marker to set
    @type  fieldMarker: string
    @param fieldValues: list of field values
    @type  fieldValues: list
    """
    self._fields[fieldMarker] = fieldValues
  
  def add_field(self, marker, value):
    """
    This method adds a field to an entry if it does not already exist
    and adds a new value to the field of an entry if it does.
    
    @param marker: field marker
    @type  marker: string
    @param value : field value
    @type  value : string    
    """
    if self._fields.has_key(marker):
      fvs = self._fields[marker]
      fvs.append(value)
    else:
      fvs = []
      fvs.append(value)
    self._fields[marker] = fvs

  def remove_field(self, fieldMarker):
    """
    This method removes from an entry every field for a given
    field marker. It will not raise an error if the specified field
    does not exist.
    
    @param fieldMarker: field marker to be deleted
    @type  fieldMarker: string
    """
    if self._fields.has_key(fieldMarker):
      del self._fields[fieldMarker]
Example #2
0
class Line:
    """This class defines a line of interlinear glossing, such as::

        \\ref 9
        \\t Vigei    avapaviei                           atarisia.
        \\m vigei    ava -pa       -vi        -ei        atari -sia
        \\g 1.PL.INC go  -PROG     -1.PL.INCL -PRES      fish  -PURP
        \\p PRO.PERS V.I -SUFF.V.3 -SUFF.VI.4 -SUFF.VI.5 V.I   -SUFF.V.4
        \\fp Yumi bai go kisim pis.
        \\fe We're going fishing.

    The tiers of a line are saved as a sequential dictionary with
    all of its associated fields. Identified by the field marker \\ref
    by default."""
    def __init__(self, label=None):
        """Constructor that initializes Line object."""
        self._fields = SequentialDictionary()
        self._label = label
        return

    def add_field(self, field):
        """Add field to line."""
        fm = field.get_marker()
        fv = field.get_values()
        self._fields[fm] = fv

    def get_field_markers(self):
        """Obtain list of unique fields for the line."""
        return self._fields.keys()

    def get_field_as_string(self, field_marker, join_string=""):
        """
        This method returns a particular field given a field marker.
        Returns a blank string if field is not found.
        
        @param field_marker: marker of desired field
        @type  field_marker: string
        @param join_string: string used to join field values (default to blank string)
        @type  join_string: string
        @rtype: string
        """
        try:
            return join_string.join(self._fields[field_marker])
        except KeyError:
            return ""

    def get_field_values_by_field_marker(self, field_marker, sep=None):
        """Obtain all fields for a line, given a field marker."""
        try:
            values = self._fields[field_marker]
            if sep == None:
                return values
            else:
                return sep.join(values)
        except KeyError:
            return None

#   def getField(self, field_marker):
#     try:
#       return self._fields[field_marker]
#     except:
#       return None

    def get_field_values(self):
        """Obtain list of field values for the line."""
        return self._fields.values()

    def get_label(self):
        """Obtain identifier for line."""
        return self._label

    def get_raw_text(self):
        """Obtain original line of text."""
        return self._rawtext

    def set_label(self, label):
        """Set identifier for line."""
        self._label = label

    def set_raw_text(self, rawtext):
        """Set original line of text."""
        self._rawtext = rawtext

    def get_morphemes(self):
        """Obtain a list of morpheme objects for the line."""
        morphemes = []
        indices = get_indices(self.getFieldValueByFieldMarker("m"))
        print "%s" % indices
        morphemeFormField = self.getFieldValueByFieldMarker("m")
        morphemeGlossField = self.getFieldValueByFieldMarker("g")
        morphemeFormSlices = get_slices_by_indices(morphemeFormField, indices)
        morphemeGlossSlices = get_slices_by_indices(morphemeGlossField,
                                                    indices)
        for i in range(0, len(morphemeFormSlices)):
            m = Morpheme()
            m.set_form(morphemeFormSlices[i].strip(" ").strip("-"))
            m.set_gloss(morphemeGlossSlices[i].strip(" ").strip("-"))
            morphemes.append(m)
        return morphemes

    def get_words(self, flagParseMorphemes=True):
        """Obtain a list of word objects for the line."""
        words = []

        # Obtain raw field values
        lineWordFormField = self.get_field_values_by_field_marker("t")
        lineMorphemeFormField = self.get_field_values_by_field_marker("m")
        lineMorphemeGlossField = self.get_field_values_by_field_marker("g")
        linePOSField = self.get_field_values_by_field_marker("p")

        wordIndices = get_indices(lineWordFormField)

        # Slice raw field values by indices
        lineWordFormSlices = get_slices_by_indices(lineWordFormField,
                                                   wordIndices)
        lineMorphemeFormSlices = get_slices_by_indices(lineMorphemeFormField,
                                                       wordIndices)
        lineMorphemeGlossSlices = get_slices_by_indices(
            lineMorphemeGlossField, wordIndices)
        linePOSSlices = get_slices_by_indices(linePOSField, wordIndices)

        # Go through each slice
        for i in range(0, len(lineWordFormSlices)):
            wordForm = lineWordFormSlices[i]
            wordMorphemeForms = lineMorphemeFormSlices[i]
            wordMorphemeGlosses = lineMorphemeGlossSlices[i]
            wordPOS = linePOSSlices[i]

            # Initialize word object and set raw fields
            w = Word()
            w.set_form(wordForm.strip(" ").strip("-"))
            w.set_raw_morphemes(wordMorphemeForms.strip(" ").strip("-"))
            w.set_raw_gloss(wordMorphemeGlosses.strip(" ").strip("-"))
            w.set_part_of_speech(wordPOS.strip(" ").strip("-"))

            # Should the word be inflated with morpheme objects?
            # If so, build morpheme object for each morpheme in word
            if flagParseMorphemes:
                morphemes = []

                # Get indices from morpheme-breakdown line in order to make slices
                morphemeIndices = get_indices(wordMorphemeForms)
                morphemeFormSlices = get_slices_by_indices(
                    wordMorphemeForms, morphemeIndices)
                morphemeGlossSlices = get_slices_by_indices(
                    wordMorphemeGlosses, morphemeIndices)
                morphemePOSSlices = get_slices_by_indices(
                    wordPOS, morphemeIndices)

                # Go through each morpheme
                for i in range(0, len(morphemeFormSlices)):
                    morphemeForm = morphemeFormSlices[i].strip(" ")
                    morphemeGloss = morphemeGlossSlices[i].strip(" ")
                    morphemePOS = morphemePOSSlices[i].strip(" ")

                    # Construct morpheme object from slices
                    m = Morpheme()
                    m.set_form(morphemeForm)
                    m.set_gloss(morphemeGloss)
                    m.set_part_of_speech(morphemePOS)

                    # Add cooked morpheme to temporary collection for word
                    morphemes.append(m)

                # Inflate word with cooked morphemes
                w.set_morphemes(morphemes)

            words.append(w)
        return words

    def get_field_value_by_field_marker_and_column(self, field_marker,
                                                   columnIndex):
        """Get values for line, given a field and column index."""
        fv = self.getFieldValueByFieldMarker(field_marker)
        field_markers = self.getFieldMarkers()
        sliceFieldMarker = field_markers[columnIndex - 1]
        indices = getIndices(self.getFieldValueByFieldMarker(field_marker))
        slices = get_slices_by_indices(fv, indices)
        return slices[columnIndex - 1]
Example #3
0
class Entry:
    """
  This class represents an entry (record) from a Shoebox lexicon. Each entry
  consists of a collection of fields, stored as a special type of dictionary
  which keeps track of the sequence in which its keys were entered.
  """
    def __init__(self):
        """
    This method constructs a new Entry object.
    """
        self._fields = SequentialDictionary()
        self._rawText = ""
        self._number = None
        self._subentries = None

    def __str__(self):
        """
    This method defines the string representation of an entry.

    @rtype:  string
    @return: an entry as a string in Standard Format
    """
        s = ""
        fields = self.get_fields()
        for fm, fvs in self._fields.items():
            for fv in fvs:
                s = s + "\n\\%s %s" % (fm, fv)
        return s

    def set_raw_text(self, rawText):
        """
    This method provides access to the raw text from which the
    Entry object was parsed.
    
    @param rawText: raw Shoebox text from which entry was parsed
    @type  rawText: string
    """
        self._rawText = rawText

    def get_raw_text(self):
        """
    This method sets the raw text from which the Entry object was parsed.

    @rtype: string
    """
        return self._rawText

    def get_subentries(self):
        """
    This method obtains all of the subentries for an entry.

    @rtype: list of Entry objects
    @returns: all of the subentries of an entry
    """
        return self._subentries

    def add_subentry(self, subentry):
        """
    This method adds to an entry a subentry, which is simply another
    Entry object.

    @param subentry: subentry
    @type  subentry: Entry object    : 
    """
        if not self._subentries:
            self._subentries = []
        self._subentries.append(subentry)

    def set_number(self, number):
        """
    This method sets the position of the entry in
    the dictionary as a cardinal number.
    
    @param number: number of entry
    @type  number: integer
    """
        self._number = number

    def get_number(self):
        """
    This method obtains the position of the entry in the dictionary
    as a cardinal number.
    
    @rtype: integer
    """
        return self._number

    def get_fields(self):
        """
    This method obtains all of the fields found in the Entry object.
    
    @rtype: list of Field objects
    """
        return self._fields.values()

    def get_field_markers(self):
        """
    This method obtains of the field markers found in the Entry object.

    @return: the field markers of an entry
    @rtype: list
    """
        return self._fields.keys()

    def get_values_by_marker(self, field_marker, sep=None):
        return self.get_field_values_by_field_marker(field_marker, sep)

    def get_field_values_by_field_marker(self, field_marker, sep=None):
        """
    This method returns all of the field values for a given field marker.
    If the L(sep) is set, it will return a string; otherwise, it will
    return a list of Field objects.
    
    @param field_marker: marker of desired field
    @type  field_marker: string
    @param sep: separator for field values
    @type  sep: string    
    @rtype: string (if sep); otherwise, list of Field objects
    """
        try:
            values = self._fields[field_marker]
            if sep == None:
                return values
            else:
                return sep.join(values)
        except KeyError:
            return None

    def get_field_as_string(self, field_marker, join_string=""):
        """
    This method returns a particular field given a field marker.
    Returns a blank string if field is not found.
    
    @param field_marker: marker of desired field
    @type  field_marker: string
    @param join_string: string used to join field values (default to blank string)
    @type  join_string: string
    @rtype: string
    """
        try:
            return join_string.join(self._fields[field_marker])
        except KeyError:
            return ""

    def get_field(self, fieldMarker):
        """
    This method returns a particular field given a field marker.
    
    @param fieldMarker: marker of desired field
    @type  fieldMarker: string
    @rtype: Field object
    """
        try:
            return Field(fieldMarker, self._fields[fieldMarker])
        except KeyError:
            return None

    def set_field(self, fieldMarker, field):
        """
    This method sets a field, given a marker and its associated data.
    
    @param fieldMarker: field marker to set
    @type  fieldMarker: string
    @param field      : field object associated with field marker
    @type  field      : Field
    """
        fvs = []
        fvs.append(fieldData)
        self._fields[fieldMarker] = fvs

    def set_field_values(self, fieldMarker, fieldValues):
        """
    This method sets all of the values associated with a field.
    
    @param fieldMarker: field marker to set
    @type  fieldMarker: string
    @param fieldValues: list of field values
    @type  fieldValues: list
    """
        self._fields[fieldMarker] = fieldValues

    def add_field(self, marker, value):
        """
    This method adds a field to an entry if it does not already exist
    and adds a new value to the field of an entry if it does.
    
    @param marker: field marker
    @type  marker: string
    @param value : field value
    @type  value : string    
    """
        if self._fields.has_key(marker):
            fvs = self._fields[marker]
            fvs.append(value)
        else:
            fvs = []
            fvs.append(value)
        self._fields[marker] = fvs

    def remove_field(self, fieldMarker):
        """
    This method removes from an entry every field for a given
    field marker. It will not raise an error if the specified field
    does not exist.
    
    @param fieldMarker: field marker to be deleted
    @type  fieldMarker: string
    """
        if self._fields.has_key(fieldMarker):
            del self._fields[fieldMarker]
Example #4
0
class Line:
    """This class defines a line of interlinear glossing, such as::

        \\ref 9
        \\t Vigei    avapaviei                           atarisia.
        \\m vigei    ava -pa       -vi        -ei        atari -sia
        \\g 1.PL.INC go  -PROG     -1.PL.INCL -PRES      fish  -PURP
        \\p PRO.PERS V.I -SUFF.V.3 -SUFF.VI.4 -SUFF.VI.5 V.I   -SUFF.V.4
        \\fp Yumi bai go kisim pis.
        \\fe We're going fishing.

    The tiers of a line are saved as a sequential dictionary with
    all of its associated fields. Identified by the field marker \\ref
    by default."""
    
    def __init__(self,
                 label=None):
        """Constructor that initializes Line object."""
        self._fields = SequentialDictionary()
        self._label = label
        return

    def add_field(self, field):
        """Add field to line."""
        fm = field.get_marker()
        fv = field.get_values()
        self._fields[fm] = fv

    def get_field_markers(self):
        """Obtain list of unique fields for the line."""
        return self._fields.keys()

    def get_field_as_string(self,
                            field_marker,
                            join_string=""):
        """
        This method returns a particular field given a field marker.
        Returns a blank string if field is not found.
        
        @param field_marker: marker of desired field
        @type  field_marker: string
        @param join_string: string used to join field values (default to blank string)
        @type  join_string: string
        @rtype: string
        """
        try:
            return join_string.join(self._fields[field_marker])
        except KeyError:
            return ""

    def get_field_values_by_field_marker(self, field_marker, sep=None):
        """Obtain all fields for a line, given a field marker."""
        try:
            values = self._fields[field_marker]
            if sep == None:
                return values
            else:
                return sep.join(values)
        except KeyError:
            return None

  #   def getField(self, field_marker):
  #     try:
  #       return self._fields[field_marker]
  #     except:
  #       return None
      
    def get_field_values(self):
        """Obtain list of field values for the line."""
        return self._fields.values()

    def get_label(self):
        """Obtain identifier for line."""
        return self._label

    def get_raw_text(self):
        """Obtain original line of text."""
        return self._rawtext

    def set_label(self, label):
        """Set identifier for line."""
        self._label = label

    def set_raw_text(self, rawtext):
        """Set original line of text."""
        self._rawtext = rawtext

    def get_morphemes(self):
        """Obtain a list of morpheme objects for the line."""
        morphemes = []
        indices = get_indices(self.getFieldValueByFieldMarker("m"))
        print "%s" % indices
        morphemeFormField = self.getFieldValueByFieldMarker("m")
        morphemeGlossField = self.getFieldValueByFieldMarker("g")
        morphemeFormSlices = get_slices_by_indices(morphemeFormField, indices)
        morphemeGlossSlices = get_slices_by_indices(morphemeGlossField, indices)
        for i in range(0, len(morphemeFormSlices)):
            m = Morpheme()
            m.set_form(morphemeFormSlices[i].strip(" ").strip("-"))
            m.set_gloss(morphemeGlossSlices[i].strip(" ").strip("-"))
            morphemes.append(m)
        return morphemes
      
    def get_words(self, flagParseMorphemes=True):
        """Obtain a list of word objects for the line."""
        words = []

        # Obtain raw field values
        lineWordFormField      = self.get_field_values_by_field_marker("t")
        lineMorphemeFormField  = self.get_field_values_by_field_marker("m")
        lineMorphemeGlossField = self.get_field_values_by_field_marker("g")
        linePOSField           = self.get_field_values_by_field_marker("p")

        wordIndices = get_indices(lineWordFormField)
      
        # Slice raw field values by indices
        lineWordFormSlices      = get_slices_by_indices(lineWordFormField,      wordIndices)
        lineMorphemeFormSlices  = get_slices_by_indices(lineMorphemeFormField,  wordIndices)
        lineMorphemeGlossSlices = get_slices_by_indices(lineMorphemeGlossField, wordIndices)
        linePOSSlices           = get_slices_by_indices(linePOSField,           wordIndices)
          
        # Go through each slice
        for i in range(0, len(lineWordFormSlices)):
            wordForm            = lineWordFormSlices[i]
            wordMorphemeForms   = lineMorphemeFormSlices[i]
            wordMorphemeGlosses = lineMorphemeGlossSlices[i]
            wordPOS             = linePOSSlices[i]

            # Initialize word object and set raw fields
            w = Word()
            w.set_form(wordForm.strip(" ").strip("-"))
            w.set_raw_morphemes(wordMorphemeForms.strip(" ").strip("-"))
            w.set_raw_gloss(wordMorphemeGlosses.strip(" ").strip("-"))
            w.set_part_of_speech(wordPOS.strip(" ").strip("-"))

            # Should the word be inflated with morpheme objects?
            # If so, build morpheme object for each morpheme in word
            if flagParseMorphemes:
                morphemes = []

                # Get indices from morpheme-breakdown line in order to make slices
                morphemeIndices     = get_indices(wordMorphemeForms)
                morphemeFormSlices  = get_slices_by_indices(wordMorphemeForms,   morphemeIndices)
                morphemeGlossSlices = get_slices_by_indices(wordMorphemeGlosses, morphemeIndices)
                morphemePOSSlices   = get_slices_by_indices(wordPOS,             morphemeIndices)

                # Go through each morpheme
                for i in range(0, len(morphemeFormSlices)):
                    morphemeForm  = morphemeFormSlices[i].strip(" ")
                    morphemeGloss = morphemeGlossSlices[i].strip(" ")
                    morphemePOS   = morphemePOSSlices[i].strip(" ")

                    # Construct morpheme object from slices
                    m = Morpheme()
                    m.set_form(morphemeForm)
                    m.set_gloss(morphemeGloss)
                    m.set_part_of_speech(morphemePOS)
                    
                    # Add cooked morpheme to temporary collection for word
                    morphemes.append(m)

                # Inflate word with cooked morphemes
                w.set_morphemes(morphemes)

            words.append(w)
        return words

    def get_field_value_by_field_marker_and_column(self, field_marker, columnIndex):
        """Get values for line, given a field and column index."""
        fv = self.getFieldValueByFieldMarker(field_marker)
        field_markers = self.getFieldMarkers()
        sliceFieldMarker = field_markers[columnIndex-1]    
        indices = getIndices(self.getFieldValueByFieldMarker(field_marker))
        slices = get_slices_by_indices(fv, indices)
        return slices[columnIndex-1]