def test_should_find_root_concordance(self): idx = RootConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4])) assert_that(idx.offsets(u"git"), equal_to([6])) assert_that(idx.offsets(u"gid"), equal_to([7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gid", SyntacticCategory.VERB), equal_to([7, 8, 9, 10, 11]))
def _validate_root_concordance_indexes(self, word_list): idx = RootConcordanceIndex(word_list) for root_str in idx._offsets._indices.iterkeys(): offsets = idx.offsets(root_str) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.str == root_str for word in words])) for root_str in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[root_str].iterkeys( ): offsets = idx.offsets(root_str, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( all([ word.root.str == root_str and word.root.syntactic_category == syntactic_category for word in words ])) for root_str in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[root_str].iterkeys( ): for secondary_syntactic_category in idx._offsets._indices[ root_str][syntactic_category].iterkeys(): offsets = idx.offsets(root_str, syntactic_category, secondary_syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( all([ word.root.str == root_str and word.root.syntactic_category == syntactic_category and word.root.secondary_syntactic_category == secondary_syntactic_category for word in words ]))
def _validate_root_concordance_indexes(self, word_list): idx = RootConcordanceIndex(word_list) for root_str in idx._offsets._indices.iterkeys(): offsets = idx.offsets(root_str) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.str==root_str for word in words])) for root_str in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[root_str].iterkeys(): offsets = idx.offsets(root_str, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.str==root_str and word.root.syntactic_category==syntactic_category for word in words])) for root_str in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[root_str].iterkeys(): for secondary_syntactic_category in idx._offsets._indices[root_str][syntactic_category].iterkeys(): offsets = idx.offsets(root_str, syntactic_category, secondary_syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.str==root_str and word.root.syntactic_category==syntactic_category and word.root.secondary_syntactic_category==secondary_syntactic_category for word in words]))
def test_should_find_root_concordance(self): idx = RootConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4])) assert_that(idx.offsets(u"git"), equal_to([6])) assert_that(idx.offsets(u"gid"), equal_to([7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gid", SyntacticCategory.VERB), equal_to([7, 8, 9, 10, 11]))