def test_eq__(datadir):
    corpus = Corpus.read_from_file(datadir / "data.conllx")

    corpus_statistics = CorpusStatistics.create_from_corpus(corpus)

    expected = CorpusStatistics(
        domain=Counter({"domain_one": 2, "domain_two": 2}),
        function=Counter({"function_one": 2, "function_two": 2}),
        sub_function=Counter({"sub_function_one": 2, "sub_function_two": 2}),
        intent=Counter({"intent_one": 2, "intent_two": 2}),
        entity_types={
            "PERSON": Counter({("王", "小", "明"): 2}),
            "GPE": Counter({("北", "京"): 2}),
            "ORG": Counter({("清", "华", "大", "学"): 2}),
            "歌手名": Counter({("蓝", "泽", "雨"): 2}),
        },
        entity_values={
            ("王", "小", "明"): Counter({"PERSON": 2}),
            ("北", "京"): Counter({"GPE": 2}),
            ("清", "华", "大", "学"): Counter({"ORG": 2}),
            ("蓝", "泽", "雨"): Counter({"歌手名": 2}),
        },
    )

    assert corpus_statistics == expected
def test_create_from_corpus(datadir):
    corpus = Corpus.read_from_file(datadir / "data.conllx")

    corpus_statistics = CorpusStatistics.create_from_corpus(corpus)

    expected = CorpusStatistics(
        domain=None,
        function=None,
        sub_function=None,
        intent=None,
        entity_types=None,
        entity_values=None,
    )

    assert corpus_statistics == expected
def test_collect_intent(datadir):
    corpus = Corpus.read_from_file(datadir / "data.conllx")

    corpus_statistics = CorpusStatistics.create_from_corpus(corpus)

    result = corpus_statistics.intent

    expected = Counter({'intent_one': 2, 'intent_two': 2})

    assert result == expected
def test_collect_sub_function(datadir):
    corpus = Corpus.read_from_file(datadir / "data.conllx")

    corpus_statistics = CorpusStatistics.create_from_corpus(corpus)

    result = corpus_statistics.sub_function

    expected = Counter({'sub_function_one': 2, 'sub_function_two': 2})

    assert result == expected
def test_collect_domain(datadir):
    corpus = Corpus.read_from_file(datadir / "data.conllx")

    corpus_statistics = CorpusStatistics.create_from_corpus(corpus)

    result = corpus_statistics.domain

    expected = Counter({"domain_one": 2, "domain_two": 2})

    assert result == expected
def test_collect_entity_values(datadir):
    corpus = Corpus.read_from_file(datadir / "data.conllx")

    corpus_statistics = CorpusStatistics.create_from_corpus(corpus)

    result = corpus_statistics.entity_values

    expected = {
        ("王", "小", "明"): Counter(["PERSON", "PERSON"]),
        ("北", "京"): Counter(["GPE", "GPE"]),
        ("清", "华", "大", "学"): Counter(["ORG", "ORG"]),
        ("蓝", "泽", "雨"): Counter(["歌手名", "歌手名"]),
    }

    assert result == expected
Ejemplo n.º 7
0
 def generate_statistics(self) -> CorpusStatistics:
     return CorpusStatistics.create_from_corpus(self)