コード例 #1
0
    def test_get_rarity_ngrams__two_entries(self):
        data = OrderedDict({
            1: ["a", "b"],  # one new
            3: ["c", "c"],  # one new
        })

        corpus = OrderedDict({
            1: ["a", "x"],
            2: ["b", "b"],
            3: ["c", "c"],
            4: ["d", "d"],
        })

        res = get_rarity_ngrams(
            data=data,
            corpus=corpus,
            n_gram=1,
            ignore_symbols=None,
        )

        assert_res = OrderedDict({
            1: (1 / 8 + 2 / 8) / 2,  # a + b
            3: (2 / 8 + 2 / 8) / 2,  # c + c
        })

        self.assertEqual(assert_res, res)
コード例 #2
0
    def test_get_rarity_ngrams__one_ignored_in_data__ignores_it(self):
        data = OrderedDict({
            1: ["z", "a"],  # one new
        })

        corpus = OrderedDict({
            1: ["a", "x"],
            2: ["b", "b"],
            3: ["c", "c"],
            4: ["d", "d"],
        })

        res = get_rarity_ngrams(
            data=data,
            corpus=corpus,
            n_gram=1,
            ignore_symbols={"z"},
        )

        assert_res = OrderedDict({
            1: 1 / 8,  # a
        })

        self.assertEqual(assert_res, res)
コード例 #3
0
    def test_get_rarity_ngrams__not_existing__has_zero(self):
        data = OrderedDict({
            1: ["z", "a"],  # one new
        })

        corpus = OrderedDict({
            1: ["a", "x"],
            2: ["b", "b"],
            3: ["c", "c"],
            4: ["d", "d"],
        })

        res = get_rarity_ngrams(
            data=data,
            corpus=corpus,
            n_gram=1,
            ignore_symbols=None,
        )

        assert_res = OrderedDict({
            1: (0 + 1 / 8) / 2,  # z + a
        })

        self.assertEqual(assert_res, res)
コード例 #4
0
    def test_get_rarity_ngrams__empty_entry__returns_inf(self):
        data = OrderedDict({
            1: [],
        })

        corpus = OrderedDict({
            1: ["a", "x"],
            2: ["b", "b"],
            3: ["c", "c"],
            4: ["d", "d"],
        })

        res = get_rarity_ngrams(
            data=data,
            corpus=corpus,
            n_gram=1,
            ignore_symbols=None,
        )

        assert_res = OrderedDict({
            1: math.inf,
        })

        self.assertEqual(assert_res, res)