def _validate_complete_word_concordance_indexes(self, word_list): idx = CompleteWordConcordanceIndex(word_list) for complete_word in idx._offsets._indices.iterkeys(): offsets = idx.offsets(complete_word) words = [word_list[offset] for offset in offsets] assert_that(all([word.str == complete_word for word in words])) for complete_word in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[ complete_word].iterkeys(): offsets = idx.offsets(complete_word, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( all([ word.str == complete_word and word.syntactic_category == syntactic_category for word in words ])) for complete_word in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[ complete_word].iterkeys(): for secondary_syntactic_category in idx._offsets._indices[ complete_word][syntactic_category].iterkeys(): offsets = idx.offsets(complete_word, syntactic_category, secondary_syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( all([ word.str == complete_word and word.syntactic_category == syntactic_category and word.secondary_syntactic_category == secondary_syntactic_category for word in words ]))
def _validate_complete_word_concordance_indexes(self, word_list): idx = CompleteWordConcordanceIndex(word_list) for complete_word in idx._offsets._indices.iterkeys(): offsets = idx.offsets(complete_word) words = [word_list[offset] for offset in offsets] assert_that(all([word.str==complete_word for word in words])) for complete_word in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[complete_word].iterkeys(): offsets = idx.offsets(complete_word, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(all([word.str==complete_word and word.syntactic_category==syntactic_category for word in words])) for complete_word in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[complete_word].iterkeys(): for secondary_syntactic_category in idx._offsets._indices[complete_word][syntactic_category].iterkeys(): offsets = idx.offsets(complete_word, syntactic_category, secondary_syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(all([word.str==complete_word and word.syntactic_category==syntactic_category and word.secondary_syntactic_category==secondary_syntactic_category for word in words]))
def test_should_find_complete_word_concordance(self): idx = CompleteWordConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 2])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1])) assert_that( idx.offsets(u"onu", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([3])) assert_that( idx.offsets(u"onu", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([4])) assert_that(idx.offsets(u"gittim"), equal_to([6])) assert_that(idx.offsets(u"gittim", SyntacticCategory.VERB), equal_to([6])) assert_that(idx.offsets(u"giderim"), equal_to([7])) assert_that(idx.offsets(u"giderim", SyntacticCategory.VERB), equal_to([7])) assert_that(idx.offsets(u"gidecekler"), equal_to([8, 10])) assert_that(idx.offsets(u"gidecekler", SyntacticCategory.VERB), equal_to([8])) assert_that(idx.offsets(u"gidecekler", SyntacticCategory.NOUN), equal_to([10])) assert_that(idx.offsets(u"gideceğim"), equal_to([9, 11])) assert_that(idx.offsets(u"gideceğim", SyntacticCategory.VERB), equal_to([9])) assert_that(idx.offsets(u"gideceğim", SyntacticCategory.NOUN), equal_to([11]))
def test_should_find_complete_word_concordance(self): idx = CompleteWordConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 2])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1])) assert_that(idx.offsets(u"onu", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([3])) assert_that(idx.offsets(u"onu", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([4])) assert_that(idx.offsets(u"gittim"), equal_to([6])) assert_that(idx.offsets(u"gittim", SyntacticCategory.VERB), equal_to([6])) assert_that(idx.offsets(u"giderim"), equal_to([7])) assert_that(idx.offsets(u"giderim", SyntacticCategory.VERB), equal_to([7])) assert_that(idx.offsets(u"gidecekler"), equal_to([8, 10])) assert_that(idx.offsets(u"gidecekler", SyntacticCategory.VERB), equal_to([8])) assert_that(idx.offsets(u"gidecekler", SyntacticCategory.NOUN), equal_to([10])) assert_that(idx.offsets(u"gideceğim"), equal_to([9, 11])) assert_that(idx.offsets(u"gideceğim", SyntacticCategory.VERB), equal_to([9])) assert_that(idx.offsets(u"gideceğim", SyntacticCategory.NOUN), equal_to([11]))