Example #1
0
    def segment(self, string, readingN, **options):
        """
        Takes a string written in the romanisation and returns the possible
        segmentations as a list of syllables.

        In contrast to :meth:`~cjklib.reading.ReadingFactory.decompose`
        this method merely segments continuous entities of the romanisation.
        Characters not part of the romanisation will not be dealt with,
        this is the task of the more general decompose method.

        :type string: str
        :param string: reading string
        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: list of list of str
        :return: a list of possible segmentations (several if ambiguous) into
            single syllables
        :raise DecompositionError: if the given string has an invalid format.
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'segment'):
            raise UnsupportedError("method 'segment' not supported")
        return readingOp.segment(string)
Example #2
0
    def isStrictDecomposition(self, decomposition, readingN, **options):
        """
        Checks if the given decomposition follows the romanisation format
        strictly to allow unambiguous decomposition.

        The romanisation should offer a way/protocol to make an unambiguous
        decomposition into it's basic syllables possible as to make the process
        of appending syllables to a string reversible. The testing on compliance
        with this protocol has to be implemented here. Thus this method can only
        return true for one and only one possible decomposition for all strings.

        :type decomposition: list of str
        :param decomposition: decomposed reading string
        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: bool
        :return: False, as this methods needs to be implemented by the sub class
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'isStrictDecomposition'):
            raise UnsupportedError(
                "method 'isStrictDecomposition' not supported")
        return readingOp.isStrictDecomposition(decomposition)
Example #3
0
    def getDecompositions(self, string, readingN, **options):
        """
        Decomposes the given string into basic entities that can be mapped to
        one Chinese character each for ambiguous decompositions. It all possible
        decompositions. This method is a more general version of
        :meth:`~cjklib.reading.ReadingFactory.decompose`.

        The returned list construction consists of two entity types: entities of
        the romanisation and other strings.

        :type string: str
        :param string: reading string
        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: list of list of str
        :return: a list of all possible decompositions consisting of basic
            entities.
        :raise DecompositionError: if the given string has a wrong format.
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'getDecompositions'):
            raise UnsupportedError("method 'getDecompositions' not supported")
        return readingOp.getDecompositions(string)
Example #4
0
    def getReadingOperatorClass(self, readingN):
        """
        Gets the :class:`~cjklib.reading.operator.ReadingOperator`'s class
        for the given reading.

        :type readingN: str
        :param readingN: name of a supported reading
        :rtype: classobj
        :return: a :class:`~cjklib.reading.operator.ReadingOperator` class
        :raise UnsupportedError: if the given reading is not supported.
        """
        if readingN not in self._sharedState['readingOperatorClasses']:
            raise UnsupportedError("reading '%s' not supported" % readingN)
        return self._sharedState['readingOperatorClasses'][readingN]
Example #5
0
    def getTones(self, readingN, **options):
        """
        Returns a set of tones supported by the reading.

        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: list
        :return: list of supported tone marks.
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'getTones'):
            raise UnsupportedError("method 'getTones' not supported")
        return readingOp.getTones()
Example #6
0
    def getFormattingEntities(self, readingN, **options):
        """
        Gets a set of entities used by the reading to format
        *reading entities*.

        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: set of str
        :return: set of supported formatting entities
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'getFormattingEntities'):
            raise UnsupportedError(
                "method 'getFormattingEntities' not supported")
        return readingOp.getFormattingEntities()
Example #7
0
    def getReadingEntities(self, readingN, **options):
        """
        Gets a set of all entities supported by the reading.

        The list is used in the segmentation process to find entity boundaries.

        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: set of str
        :return: set of supported *reading entities*
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'getReadingEntities'):
            raise UnsupportedError("method 'getReadingEntities' not supported")
        return readingOp.getReadingEntities()
Example #8
0
    def getPlainReadingEntities(self, readingN, **options):
        """
        Gets the list of plain entities supported by this reading. Different to
        :meth:`~cjklib.reading.ReadingFactory.getReadingEntities`
        the entities will carry no tone mark.

        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: set of str
        :return: set of supported syllables
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'getPlainReadingEntities'):
            raise UnsupportedError(
                "method 'getPlainReadingEntities' not supported")
        return readingOp.getPlainReadingEntities()
Example #9
0
    def getReadingConverterClass(self, fromReading, toReading):
        """
        Gets the :class:`~cjklib.reading.converter.ReadingConverter`'s class
        for the given source and target reading.

        :type fromReading: str
        :param fromReading: name of the source reading
        :type toReading: str
        :param toReading: name of the target reading
        :rtype: classobj
        :return: a :class:`~cjklib.reading.converter.ReadingConverter` class
        :raise UnsupportedError: if conversion for the given readings is not
            supported.
        """
        if not self.isReadingConversionSupported(fromReading, toReading):
            raise UnsupportedError(
                "conversion from '%s' to '%s' not supported"
                % (fromReading, toReading))
        return self._sharedState['readingConverterClasses']\
            [(fromReading, toReading)]
Example #10
0
    def splitEntityTone(self, entity, readingN, **options):
        """
        Splits the entity into an entity without tone mark (plain entity) and
        the entity's tone. The letter case of the given entity might not be
        fully conserved for mixed case strings.

        :type entity: str
        :param entity: entity with tonal information
        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: tuple
        :return: plain entity without tone mark and entity's tone
        :raise InvalidEntityError: if the entity is invalid.
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'splitEntityTone'):
            raise UnsupportedError("method 'splitEntityTone' not supported")
        return readingOp.splitEntityTone(entity)
Example #11
0
    def getTonalEntity(self, plainEntity, tone, readingN, **options):
        """
        Gets the entity with tone mark for the given plain entity and tone. The
        letter case of the given plain entity might not be fully conserved for
        mixed case strings.

        :type plainEntity: str
        :param plainEntity: entity without tonal information
        :param tone: tone
        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: str
        :return: entity with appropriate tone
        :raise InvalidEntityError: if the entity is invalid.
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'getTonalEntity'):
            raise UnsupportedError("method 'getTonalEntity' not supported")
        return readingOp.getTonalEntity(plainEntity, tone)
Example #12
0
    def isPlainReadingEntity(self, entity, readingN, **options):
        """
        Returns true if the given plain entity (without any tone mark) is
        recognised by the romanisation operator, i.e. it is a valid entity of
        the reading returned by the segmentation method.

        Reading entities will be handled as being case insensitive.

        :type entity: str
        :param entity: entity to check
        :type readingN: str
        :param readingN: name of reading
        :param options: additional options for handling the input
        :rtype: bool
        :return: ``True`` if string is an entity of the reading, ``False``
            otherwise.
        :raise UnsupportedError: if the given reading is not supported or the
            reading doesn't support the specified method.
        """
        readingOp = self._getReadingOperatorInstance(readingN, **options)
        if not hasattr(readingOp, 'isPlainReadingEntity'):
            raise UnsupportedError(
                "method 'isPlainReadingEntity' not supported")
        return readingOp.isPlainReadingEntity(entity)