def test_should_find_root_concordance(self):
        idx = RootConcordanceIndex(self.word_list)

        assert_that(idx.offsets(u'something'), equal_to([]))

        assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4]))

        assert_that(idx.offsets(u"git"), equal_to([6]))
        assert_that(idx.offsets(u"gid"), equal_to([7, 8, 9, 10, 11]))
        assert_that(idx.offsets(u"gid", SyntacticCategory.VERB), equal_to([7, 8, 9, 10, 11]))
Example #2
0
    def _validate_root_concordance_indexes(self, word_list):
        idx = RootConcordanceIndex(word_list)

        for root_str in idx._offsets._indices.iterkeys():
            offsets = idx.offsets(root_str)
            words = [word_list[offset] for offset in offsets]
            assert_that(all([word.root.str == root_str for word in words]))

        for root_str in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[root_str].iterkeys(
            ):

                offsets = idx.offsets(root_str, syntactic_category)
                words = [word_list[offset] for offset in offsets]
                assert_that(
                    all([
                        word.root.str == root_str
                        and word.root.syntactic_category == syntactic_category
                        for word in words
                    ]))

        for root_str in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[root_str].iterkeys(
            ):
                for secondary_syntactic_category in idx._offsets._indices[
                        root_str][syntactic_category].iterkeys():

                    offsets = idx.offsets(root_str, syntactic_category,
                                          secondary_syntactic_category)
                    words = [word_list[offset] for offset in offsets]
                    assert_that(
                        all([
                            word.root.str == root_str and
                            word.root.syntactic_category == syntactic_category
                            and word.root.secondary_syntactic_category
                            == secondary_syntactic_category for word in words
                        ]))
Example #3
0
    def _validate_root_concordance_indexes(self, word_list):
        idx = RootConcordanceIndex(word_list)

        for root_str in idx._offsets._indices.iterkeys():
            offsets = idx.offsets(root_str)
            words = [word_list[offset] for offset in offsets]
            assert_that(all([word.root.str==root_str for word in words]))

        for root_str in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[root_str].iterkeys():

                offsets = idx.offsets(root_str, syntactic_category)
                words = [word_list[offset] for offset in offsets]
                assert_that(all([word.root.str==root_str and word.root.syntactic_category==syntactic_category for word in words]))

        for root_str in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[root_str].iterkeys():
                for secondary_syntactic_category in idx._offsets._indices[root_str][syntactic_category].iterkeys():

                    offsets = idx.offsets(root_str, syntactic_category, secondary_syntactic_category)
                    words = [word_list[offset] for offset in offsets]
                    assert_that(all([word.root.str==root_str and word.root.syntactic_category==syntactic_category
                                     and word.root.secondary_syntactic_category==secondary_syntactic_category
                                     for word in words]))
Example #4
0
    def test_should_find_root_concordance(self):
        idx = RootConcordanceIndex(self.word_list)

        assert_that(idx.offsets(u'something'), equal_to([]))

        assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN),
                    equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER),
                    equal_to([2]))
        assert_that(
            idx.offsets(u"o", SyntacticCategory.PRONOUN,
                        SecondarySyntacticCategory.PERSONAL), equal_to([0, 3]))
        assert_that(
            idx.offsets(u"o", SyntacticCategory.PRONOUN,
                        SecondarySyntacticCategory.DEMONSTRATIVE),
            equal_to([1, 4]))

        assert_that(idx.offsets(u"git"), equal_to([6]))
        assert_that(idx.offsets(u"gid"), equal_to([7, 8, 9, 10, 11]))
        assert_that(idx.offsets(u"gid", SyntacticCategory.VERB),
                    equal_to([7, 8, 9, 10, 11]))