def _validate_transition_word_concordance_indexes(self, word_list): idx = TransitionWordConcordanceIndex(word_list) for transition_word in idx._offsets._indices.iterkeys(): offsets = idx.offsets(transition_word) words = [word_list[offset] for offset in offsets] assert_that( any([ suffix.word == transition_word for word in words for suffix in word.suffixes ]), u'Transition word {} not found'.format(transition_word)) for transition_word in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[ transition_word].iterkeys(): offsets = idx.offsets(transition_word, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( any([ suffix.word == transition_word and suffix.to_syntactic_category == syntactic_category for word in words for suffix in word.suffixes ]), u'Transition word {}+{} not found'.format( transition_word, syntactic_category))
def _validate_transition_word_concordance_indexes(self, word_list): idx = TransitionWordConcordanceIndex(word_list) for transition_word in idx._offsets._indices.iterkeys(): offsets = idx.offsets(transition_word) words = [word_list[offset] for offset in offsets] assert_that(any([suffix.word==transition_word for word in words for suffix in word.suffixes]), u'Transition word {} not found'.format(transition_word)) for transition_word in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[transition_word].iterkeys(): offsets = idx.offsets(transition_word, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(any([suffix.word==transition_word and suffix.to_syntactic_category==syntactic_category for word in words for suffix in word.suffixes]), u'Transition word {}+{} not found'.format(transition_word, syntactic_category))
def test_should_find_transition_word_concordance(self): idx = TransitionWordConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4])) assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gid"), equal_to([])) assert_that(idx.offsets(u"git", SyntacticCategory.VERB), equal_to([6, 7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gidecek"), equal_to([8, 9, 10, 11])) assert_that(idx.offsets(u"gidecek", SyntacticCategory.NOUN), equal_to([10, 11])) assert_that(idx.offsets(u"gidecek", SyntacticCategory.VERB), equal_to([8, 9])) assert_that(idx.offsets(u"gideceğ"), equal_to([]))
def test_should_find_transition_word_concordance(self): idx = TransitionWordConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4])) assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gid"), equal_to([])) assert_that(idx.offsets(u"git", SyntacticCategory.VERB), equal_to([6, 7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gidecek"), equal_to([8, 9, 10, 11])) assert_that(idx.offsets(u"gidecek", SyntacticCategory.NOUN), equal_to([10, 11])) assert_that(idx.offsets(u"gidecek", SyntacticCategory.VERB), equal_to([8, 9])) assert_that(idx.offsets(u"gideceğ"), equal_to([]))