Exemple #1
0
class CalimaStarReinflector(object):
    """CALIMA Star reinflector component.

    Arguments:
        db (:obj:`~camel_tools.calima_star.database.CalimaStarDB`): Database to
            use for generation. Must be opened in reinflection mode or both
            analysis and generation modes.

    Raises:
        :obj:`~camel_tools.calima_star.errors.ReinflectorError`: If **db** is
            not an instance of
            :obj:`~camel_tools.calima_star.database.CalimaStarDB` or if **db**
            does not support reinflection.
    """
    def __init__(self, db):
        if not isinstance(db, CalimaStarDB):
            raise ReinflectorError('DB is not an instance of CalimaStarDB')
        if not db.flags.generation:
            raise ReinflectorError('DB does not support reinflection')

        self._db = db

        self._analyzer = CalimaStarAnalyzer(db)
        self._generator = CalimaStarGenerator(db)

    def reinflect(self, word, feats):
        """Generate analyses for a given word from a given set of inflectional
        features.

        Arguments:
            word (:obj:`str`): Word to reinflect.
            feats (:obj:`dict`): Dictionary of features.
                See :doc:`/reference/calima_star_features` for more information
                on features and their values.

        Returns:
            :obj:`list` of :obj:`dict`: List of generated analyses.
            See :doc:`/reference/calima_star_features` for more information on
            features and their values.

        Raises:
            :obj:`~camel_tools.calima_star.errors.InvalidReinflectorFeature`:
                If a feature is given that is not defined in database.
            :obj:`~camel_tools.calima_star.errors.InvalidReinflectorFeatureValue`:
                If an invalid value is given to a feature or if 'pos' feature
                is not defined.
        """

        analyses = self._analyzer.analyze(word)

        if not analyses or len(analyses) == 0:
            return []

        for feat in feats:
            if feat not in self._db.defines:
                raise InvalidReinflectorFeature(feat)
            elif self._db.defines[feat] is not None:
                if feat in _ANY_FEATS and feats[feat] == 'ANY':
                    continue
                elif feats[feat] not in self._db.defines[feat]:
                    raise InvalidReinflectorFeatureValue(feat, feats[feat])

        has_clitics = False
        for feat in _CLITIC_FEATS:
            if feat in feats:
                has_clitics = True
                break

        results = deque()

        for analysis in analyses:
            if dediac_ar(analysis['diac']) != dediac_ar(word):
                continue

            if 'pos' in feats and feats['pos'] != analysis['pos']:
                continue

            lemma = _LEMMA_SPLIT_RE.split(analysis['lex'])[0]

            if 'lex' in feats and feats['lex'] != lemma:
                continue

            is_valid = True
            generate_feats = {}

            for feat in analysis.keys():
                if feat in _IGNORED_FEATS:
                    continue
                elif feat in _SPECIFIED_FEATS and feat not in feats:
                    continue
                elif has_clitics and feat in _CLITIC_IGNORED_FEATS:
                    continue
                else:
                    if feat in feats:
                        if feats[feat] == 'ANY':
                            continue
                        elif analysis[feat] != 'na':
                            generate_feats[feat] = feats[feat]
                        else:
                            is_valid = False
                            break
                    elif analysis[feat] != 'na':
                        generate_feats[feat] = analysis[feat]

            if is_valid:
                generated = self._generator.generate(lemma, generate_feats)
                if generated is not None:
                    results.extend(generated)

        return list(results)
Exemple #2
0
def _generate(db, fin, fout, backoff):
    generator = CalimaStarGenerator(db)
    reinflector = CalimaStarReinflector(db) if backoff == 'REINFLECT' else None

    line = force_unicode(fin.readline())
    line_num = 1

    while line:
        line = line.strip()

        if len(line) == 0:
            line = force_unicode(fin.readline())
            line_num += 1
            continue

        parsed = _parse_generator_line(line)

        if parsed is None:
            if fin is sys.stdin:
                sys.stderr.write('Error: Invalid input line.\n')
            else:
                sys.stderr.write(
                    'Error: Invalid input line ({}).\n'.format(line_num))

        else:
            lemma = parsed[0]
            feats = parsed[1]

            # Make sure lemma and pos are specified first
            if lemma is None:
                if fin is sys.stdin:
                    sys.stderr.write('Error: Missing lex/lemma feature.\n')
                else:
                    sys.stderr.write(
                        'Error: Missing lex/lemma feature. [{}].\n'.format(
                            line_num))
            elif 'pos' not in feats:
                if fin is sys.stdin:
                    sys.stderr.write('Error: Missing pos feature.\n')
                else:
                    sys.stderr.write(
                        'Error: Missing pos feature. [{}]\n'.format(line_num))
            else:
                try:
                    analyses = generator.generate(lemma, feats)

                    if len(analyses) == 0 and backoff == 'REINFLECT':
                        word = _dediac(lemma)
                        analyses = reinflector.reinflect(word, feats)

                    serialized = _serialize_analyses(fout, lemma, analyses,
                                                     db.order, True)

                    if six.PY3:
                        fout.write(serialized)
                    else:
                        fout.write(force_encoding(serialized))

                    fout.write('\n\n')
                except GeneratorError as error:
                    if fin is sys.stdin:
                        sys.stderr.write('Error: {}.\n'.format(error.msg))
                    else:
                        sys.stderr.write('Error: {}. [{}]\n'.format(
                            error.msg, line_num))

        line = force_encoding(fin.readline())
        line_num += 1