Beispiel #1
0
    def _validate_transition_matched_word_concordance_indexes(self, word_list):
        idx = TransitionMatchedWordConcordanceIndex(word_list)

        for transition_word in idx._offsets._indices.iterkeys():
            offsets = idx.offsets(transition_word)
            words = [word_list[offset] for offset in offsets]
            assert_that(
                any([
                    suffix.matched_word == transition_word for word in words
                    for suffix in word.suffixes
                ]), u'Transition word {} not found'.format(transition_word))

        for transition_word in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[
                    transition_word].iterkeys():

                offsets = idx.offsets(transition_word, syntactic_category)
                words = [word_list[offset] for offset in offsets]
                assert_that(
                    any([
                        suffix.matched_word == transition_word
                        and suffix.to_syntactic_category == syntactic_category
                        for word in words for suffix in word.suffixes
                    ]), u'Transition word {}+{} not found'.format(
                        transition_word, syntactic_category))
Beispiel #2
0
    def _validate_transition_matched_word_concordance_indexes(self, word_list):
        idx = TransitionMatchedWordConcordanceIndex(word_list)

        for transition_word in idx._offsets._indices.iterkeys():
            offsets = idx.offsets(transition_word)
            words = [word_list[offset] for offset in offsets]
            assert_that(any([suffix.matched_word==transition_word for word in words for suffix in word.suffixes]),
                u'Transition word {} not found'.format(transition_word))

        for transition_word in idx._offsets._indices.iterkeys():
            for syntactic_category in idx._offsets._indices[transition_word].iterkeys():

                offsets = idx.offsets(transition_word, syntactic_category)
                words = [word_list[offset] for offset in offsets]
                assert_that(any([suffix.matched_word==transition_word and suffix.to_syntactic_category==syntactic_category for word in words for suffix in word.suffixes]),
                    u'Transition word {}+{} not found'.format(transition_word, syntactic_category))
Beispiel #3
0
    def test_should_find_transition_matched_word_concordance(self):
        idx = TransitionMatchedWordConcordanceIndex(self.word_list)

        assert_that(idx.offsets(u'something'), equal_to([]))

        assert_that(idx.offsets(u"o"), equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN),
                    equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER),
                    equal_to([]))
        assert_that(
            idx.offsets(u"o", SyntacticCategory.PRONOUN,
                        SecondarySyntacticCategory.PERSONAL), equal_to([0, 3]))
        assert_that(
            idx.offsets(u"o", SyntacticCategory.PRONOUN,
                        SecondarySyntacticCategory.DEMONSTRATIVE),
            equal_to([1, 4]))

        assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11]))
        assert_that(idx.offsets(u"gid"), equal_to([]))
        assert_that(idx.offsets(u"git", SyntacticCategory.VERB),
                    equal_to([6, 7, 8, 9, 10, 11]))

        assert_that(idx.offsets(u"gidecek"), equal_to([8, 10]))
        assert_that(idx.offsets(u"gidecek", SyntacticCategory.NOUN),
                    equal_to([10]))
        assert_that(idx.offsets(u"gidecek", SyntacticCategory.VERB),
                    equal_to([8]))

        assert_that(idx.offsets(u"gideceğ"), equal_to([9, 11]))
        assert_that(idx.offsets(u"gideceğ", SyntacticCategory.NOUN),
                    equal_to([11]))
        assert_that(idx.offsets(u"gideceğ", SyntacticCategory.VERB),
                    equal_to([9]))
    def test_should_find_transition_matched_word_concordance(self):
        idx = TransitionMatchedWordConcordanceIndex(self.word_list)

        assert_that(idx.offsets(u'something'), equal_to([]))

        assert_that(idx.offsets(u"o"), equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4]))
        assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3]))
        assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4]))

        assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11]))
        assert_that(idx.offsets(u"gid"), equal_to([]))
        assert_that(idx.offsets(u"git", SyntacticCategory.VERB), equal_to([6, 7, 8, 9, 10, 11]))

        assert_that(idx.offsets(u"gidecek"), equal_to([8, 10]))
        assert_that(idx.offsets(u"gidecek", SyntacticCategory.NOUN), equal_to([10]))
        assert_that(idx.offsets(u"gidecek", SyntacticCategory.VERB), equal_to([8]))

        assert_that(idx.offsets(u"gideceğ"), equal_to([9, 11]))
        assert_that(idx.offsets(u"gideceğ", SyntacticCategory.NOUN), equal_to([11]))
        assert_that(idx.offsets(u"gideceğ", SyntacticCategory.VERB), equal_to([9]))