def test_should_find_lemma_concordance(self):
        idx = DictionaryItemConcordanceIndex(self.word_list)

        assert_that(idx.offsets(u'something'), equal_to([]))

        assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4]))

        assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11]))
        assert_that(idx.offsets(u"gid"), equal_to([]))
        assert_that(idx.offsets(u"git", SyntacticCategory.VERB), equal_to([6, 7, 8, 9, 10, 11]))
Example #2
0
    def test_should_find_lemma_concordance(self):
        idx = DictionaryItemConcordanceIndex(self.word_list)

        assert_that(idx.offsets(u'something'), equal_to([]))

        assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN),
                    equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER),
                    equal_to([2]))
        assert_that(
            idx.offsets(u"o", SyntacticCategory.PRONOUN,
                        SecondarySyntacticCategory.PERSONAL), equal_to([0, 3]))
        assert_that(
            idx.offsets(u"o", SyntacticCategory.PRONOUN,
                        SecondarySyntacticCategory.DEMONSTRATIVE),
            equal_to([1, 4]))

        assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11]))
        assert_that(idx.offsets(u"gid"), equal_to([]))
        assert_that(idx.offsets(u"git", SyntacticCategory.VERB),
                    equal_to([6, 7, 8, 9, 10, 11]))
Example #3
0
    def _validate_lemma_concordance_indexes(self, word_list):
        idx = DictionaryItemConcordanceIndex(word_list)

        for lemma_root in idx._offsets._indices.iterkeys():
            offsets = idx.offsets(lemma_root)
            words = [word_list[offset] for offset in offsets]
            assert_that(
                all([word.root.lemma_root == lemma_root for word in words]))

        for lemma_root in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[
                    lemma_root].iterkeys():

                offsets = idx.offsets(lemma_root, syntactic_category)
                words = [word_list[offset] for offset in offsets]
                assert_that(
                    all([
                        word.root.lemma_root == lemma_root
                        and word.root.syntactic_category == syntactic_category
                        for word in words
                    ]))

        for lemma_root in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[
                    lemma_root].iterkeys():
                for secondary_syntactic_category in idx._offsets._indices[
                        lemma_root][syntactic_category].iterkeys():

                    offsets = idx.offsets(lemma_root, syntactic_category,
                                          secondary_syntactic_category)
                    words = [word_list[offset] for offset in offsets]
                    assert_that(
                        all([
                            word.root.lemma_root == lemma_root and
                            word.root.syntactic_category == syntactic_category
                            and word.root.secondary_syntactic_category
                            == secondary_syntactic_category for word in words
                        ]))
Example #4
0
    def _validate_lemma_concordance_indexes(self, word_list):
        idx = DictionaryItemConcordanceIndex(word_list)

        for lemma_root in idx._offsets._indices.iterkeys():
            offsets = idx.offsets(lemma_root)
            words = [word_list[offset] for offset in offsets]
            assert_that(all([word.root.lemma_root==lemma_root for word in words]))

        for lemma_root in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[lemma_root].iterkeys():

                offsets = idx.offsets(lemma_root, syntactic_category)
                words = [word_list[offset] for offset in offsets]
                assert_that(all([word.root.lemma_root==lemma_root and word.root.syntactic_category==syntactic_category for word in words]))

        for lemma_root in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[lemma_root].iterkeys():
                for secondary_syntactic_category in idx._offsets._indices[lemma_root][syntactic_category].iterkeys():

                    offsets = idx.offsets(lemma_root, syntactic_category, secondary_syntactic_category)
                    words = [word_list[offset] for offset in offsets]
                    assert_that(all([word.root.lemma_root==lemma_root and word.root.syntactic_category==syntactic_category
                                     and word.root.secondary_syntactic_category==secondary_syntactic_category
                                     for word in words]))