Python Tag Exemples, maru.tag.Tag Python Exemples

Exemple #1

0

Afficher le fichier

def test():
    assert_tags_equal(
        tagger=LinearTagger(),
        expected=[
            (0,
             Tag(
                 pos=PartOfSpeech.ADJECTIVE,
                 case=Case.NOMINATIVE,
                 degree=Degree.POSITIVE,
                 gender=Gender.NEUTER,
                 number=Number.SINGULAR,
                 variant=Variant.FULL,
             )),
            (
                1,
                Tag(
                    pos=PartOfSpeech.NOUN,
                    animacy=Animacy.INANIMATE,
                    case=Case.NOMINATIVE,
                    gender=Gender.NEUTER,
                    number=Number.SINGULAR,
                ),
            ),
        ],
        words=['чёрное', 'зеркало'],
    )

Exemple #2

0

Afficher le fichier

def test():
    assert_tags_equal(
        tagger=CRFTagger(),
        expected=[
            (0,
             Tag(
                 pos=PartOfSpeech.ADJECTIVE,
                 case=Case.NOMINATIVE,
                 degree=Degree.POSITIVE,
                 gender=Gender.MASCULINE,
                 number=Number.SINGULAR,
                 variant=Variant.FULL,
             )),
            (
                1,
                Tag(
                    pos=PartOfSpeech.NOUN,
                    animacy=Animacy.ANIMATE,
                    case=Case.NOMINATIVE,
                    gender=Gender.MASCULINE,
                    number=Number.SINGULAR,
                ),
            ),
        ],
        words=['настоящий', 'детектив'],
    )

Exemple #3

0

Afficher le fichier

def test():
    assert_tags_equal(tagger=RNNTagger(),
                      expected=[
                          (
                              0,
                              Tag(
                                  pos=PartOfSpeech.ADJECTIVE,
                                  case=Case.NOMINATIVE,
                                  degree=Degree.POSITIVE,
                                  number=Number.PLURAL,
                                  variant=Variant.FULL,
                              ),
                          ),
                          (
                              1,
                              Tag(
                                  pos=PartOfSpeech.NOUN,
                                  animacy=Animacy.INANIMATE,
                                  case=Case.NOMINATIVE,
                                  gender=Gender.NEUTER,
                                  number=Number.PLURAL,
                              ),
                          ),
                      ],
                      words=['необычные', 'дела'])

Exemple #4

0

Afficher le fichier

def test_tag():
    tag = Tag(pos=PartOfSpeech.NOUN)

    _assert_analyzed_equal(
        expected=[Morph(word='hello', lemma='hello', tag=tag)],
        taggers=[ConstantTagger(word='hello', tag=tag)],
        text=['hello'],
    )

Exemple #5

0

Afficher le fichier

def test_tag_partially():
    tag = Tag(pos=PartOfSpeech.ADJECTIVE)

    _assert_analyzed_equal(
        expected=[
            Morph(word='hello', lemma='hello', tag=_UNKNOWN),
            Morph(word='world', lemma='world', tag=tag),
        ],
        taggers=[ConstantTagger(word='world', tag=tag)],
        text=['hello', 'world'],
    )

Exemple #6

0

Afficher le fichier

Fichier : tag.py Projet : gilyazutdinov/maru

def get_tag(parse: pymorphy2.analyzer.Parse) -> Tag:
    return Tag(
        pos=get_part_of_speech(parse),
        animacy=get_animacy(parse),
        aspect=get_aspect(parse),
        case=get_case(parse),
        degree=get_degree(parse),
        gender=get_gender(parse),
        mood=get_mood(parse),
        number=get_number(parse),
        person=get_person(parse),
        tense=get_tense(parse),
        verbform=get_verbform(parse),
        voice=get_voice(parse),
    )

Exemple #7

0

Afficher le fichier

import re
from typing import Iterator

from maru.grammeme import PartOfSpeech
from maru.grammeme.numform import NumericalForm
from maru.tag import Tag
from maru.tagger.abstract import ITagger, Tagged
from maru.types import Text, Indices

_REGEX = re.compile(f'(?P<{NumericalForm.REAL}>\d+[.,]\d+$)|'
                    f'(?P<{NumericalForm.INTEGER}>\d+$)')

_INTEGER = Tag(pos=PartOfSpeech.NUMERICAL, numform=NumericalForm.INTEGER)
_REAL = Tag(pos=PartOfSpeech.NUMERICAL, numform=NumericalForm.REAL)


class NumericalTagger(ITagger):
    def tag(self, text: Text, indices: Indices) -> Iterator[Tagged]:
        for index in indices:
            match = _REGEX.match(text[index])
            if match is not None:
                group = match.lastgroup
                tag = _REAL if group == NumericalForm.REAL else _INTEGER
                yield index, tag

Exemple #8

0

Afficher le fichier

Fichier : test_punctuation.py Projet : janyfe/maru

import pytest

from maru.grammeme import PartOfSpeech
from maru.tag import Tag
from maru.tagger.punctuation import PunctuationTagger
from tests.tagger.base import TaggerTest

_PUNCTUATION = Tag(pos=PartOfSpeech.PUNCTUATION)


@pytest.fixture(name='tagger', scope='session')
def create_tagger():
    return PunctuationTagger()


@pytest.mark.parametrize(
    'test',
    [
        TaggerTest(
            words=['!', '@', '.....,'],
            tags=[(0, _PUNCTUATION), (1, _PUNCTUATION), (2, _PUNCTUATION)],
        ),
        TaggerTest(
            words=['?!', '"', ':', ';'],
            tags=[
                (0, _PUNCTUATION),
                (1, _PUNCTUATION),
                (2, _PUNCTUATION),
                (3, _PUNCTUATION),
            ],
        ),

Exemple #9

0

Afficher le fichier

def test():
    lemmatizer = PymorphyLemmatizer()

    assert lemmatizer.lemmatize('мыло', Tag(pos=PartOfSpeech.VERB)) == 'мыть'

Exemple #10

0

Afficher le fichier

Fichier : test_rnn.py Projet : janyfe/maru

@pytest.fixture(name='tagger', scope='session')
def create_tagger():
    return RNNTagger()


@pytest.mark.parametrize(
    'test',
    [
        TaggerTest(
            words=['необычные', 'дела'],
            tags=[
                (
                    0,
                    Tag(
                        pos=PartOfSpeech.ADJECTIVE,
                        case=Case.NOMINATIVE,
                        degree=Degree.POSITIVE,
                        number=Number.PLURAL,
                        variant=Variant.FULL,
                    ),
                ),
                (
                    1,
                    Tag(
                        pos=PartOfSpeech.NOUN,
                        animacy=Animacy.INANIMATE,
                        case=Case.NOMINATIVE,
                        gender=Gender.NEUTER,
                        number=Number.PLURAL,
                    ),

Exemple #11

0

Afficher le fichier

Fichier : test_analyzer.py Projet : gilyazutdinov/maru

from typing import Sequence

from maru.grammeme import PartOfSpeech
from maru.lemmatizer import DummyLemmatizer
from maru.morph import Morph
from maru.analyzer import Analyzer
from maru.tag import Tag
from maru.tagger import ITagger
from maru.types import Text

from tests.stubs.tagger import ConstantTagger

_UNKNOWN = Tag(pos=PartOfSpeech.UNKNOWN)


def _assert_analyzed_equal(expected: Sequence[Morph],
                           taggers: Sequence[ITagger],
                           text: Text,
                           ):
    analyzer = Analyzer(taggers, lemmatizer=DummyLemmatizer())

    assert expected == list(analyzer.analyze(text))


def test_unknown():
    _assert_analyzed_equal(
        expected=[
            Morph(
                word='hello',
                lemma='hello',
                tag=_UNKNOWN,

Exemple #12

0

Afficher le fichier

import re
from typing import Iterator

from maru.grammeme import PartOfSpeech
from maru.grammeme.numform import NumericalForm
from maru.tag import Tag
from maru.tagger.abstract import ITagger, Tagged
from maru.types import Indices, Text

_REGEX = re.compile(rf'(?P<{NumericalForm.REAL}>\d+[.,/]\d+$)|'
                    rf'(?P<{NumericalForm.INTEGER}>\d+$)|'
                    rf'(?P<{NumericalForm.RANGE}>\d+[‑–—−-]\d+)')
_TAGS = {
    NumericalForm.REAL:
    Tag(pos=PartOfSpeech.NUMERICAL, numform=NumericalForm.REAL),
    NumericalForm.INTEGER:
    Tag(pos=PartOfSpeech.NUMERICAL, numform=NumericalForm.INTEGER),
    NumericalForm.RANGE:
    Tag(pos=PartOfSpeech.NUMERICAL, numform=NumericalForm.RANGE),
}


class NumericalTagger(ITagger):
    def tag(self, text: Text, indices: Indices) -> Iterator[Tagged]:
        for index in indices:
            match = _REGEX.match(text[index])
            if match is not None:
                form = NumericalForm(match.lastgroup)
                yield index, _TAGS[form]