예제 #1
0
    def create_indexes(self, appender_matrix):
        for appender_tuple in appender_matrix:
            target_appender, context_appender = None, None

            if len(appender_tuple) > 1:
                target_appender, context_appender = appender_tuple
            else:
                context_appender = appender_tuple[0]

            for key_index in range(0, len(self._collection_map.keys())):

                n = sorted(list(self._collection_map.keys()))[key_index]
                collection = self._collection_map[n]

                if n > 1:
                    smaller_collection = self._collection_map[n - 1]
                    smaller_index_container = WordNGramQueryContainer(n - 1)

                    self._create_container_and_index(smaller_collection,
                                                     smaller_index_container,
                                                     n - 1, None,
                                                     context_appender)

                index_container = WordNGramQueryContainer(n)
                self._create_container_and_index(collection, index_container,
                                                 n, target_appender,
                                                 context_appender)
    def _count_target_form_given_context(self, target, context,
                                         target_comes_after, target_appender,
                                         context_appender):
        query_container = WordNGramQueryContainer(
            len(context) +
            1) if target_appender else WordNGramQueryContainer(len(context))
        params = []

        if target_appender:
            target_appender.append(target, query_container, params)
        for context_item in context:
            context_appender.append(context_item, query_container, params)

        return self._find_count_for_query(params, query_container,
                                          target_comes_after)
예제 #3
0
    def test_create_context_keys_without_given(self):
        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), False)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), True)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))


        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(True), False)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value", "item_0.word.surface.syntactic_category"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(True), True)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value", "item_0.word.surface.syntactic_category"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))


        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), False)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), True)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))
예제 #4
0
    def test_create_context_keys_and_index_with_target_and_given(self):
        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(False).given_surface(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(True ).given_surface(False), True)
        assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_0.word.surface.value']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_1_surface_cat_0_surface"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(False).given_surface(True ), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface_cat"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(False).given_surface(True ), True)
        assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_1_surface_0_surface_cat"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value', 'item_2.word.stem.value']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word3GramIdx_0_surface_cat_1_surface_2_stem"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(True ), True)
        assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_2.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_1.word.stem.value', 'item_1.word.stem.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word3GramIdx_2_surface_cat_0_surface_1_stem_cat"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word3GramIdx_0_surface_1_surface_cat_2_stem"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), True)
        assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word3GramIdx_2_surface_0_surface_cat_1_stem"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.fourgram_collection, WordNGramQueryContainer(4).target_surface(False).given_surface(True ).given_stem(False).given_lemma_root(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value', 'item_3.word.lemma_root.value']))
        assert_that(query_execution_context.collection, is_(self.fourgram_collection))
        assert_that(query_execution_context.index_name, equal_to("word4GramIdx_0_surface_1_surface_cat_2_stem_3_lemma_root"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.fourgram_collection, WordNGramQueryContainer(4).target_surface(True ).given_surface(True ).given_stem(False).given_lemma_root(True), True)
        assert_that(query_execution_context.keys, equal_to(['item_3.word.surface.value', 'item_3.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value', 'item_2.word.lemma_root.value', 'item_2.word.lemma_root.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.fourgram_collection))
        assert_that(query_execution_context.index_name, equal_to("word4GramIdx_3_surface_cat_0_surface_cat_1_stem_2_lemma_root_cat"))
예제 #5
0
    def test_add_criterion(self):
        self.assertRaises(AssertionError, lambda: WordNGramQueryContainer(-1))
        self.assertRaises(AssertionError, lambda: WordNGramQueryContainer(0))

        assert_that(str(WordNGramQueryContainer(1).target_surface().target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(1).target_surface(False).target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(1).target_surface(True).target_item), equal_to("(surface, syntactic_category)"))

        assert_that(str(WordNGramQueryContainer(2).target_surface().target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(True).target_item), equal_to("(surface, syntactic_category)"))

        assert_that(str(WordNGramQueryContainer(2).target_surface().given_stem().target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(False).target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(True).target_item), equal_to("(surface)"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_stem(True).target_item), equal_to("(surface, syntactic_category)"))

        assert_that(str(WordNGramQueryContainer(2).target_surface().given_items), equal_to("[]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_items), equal_to("[]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_items), equal_to("[]"))

        assert_that(str(WordNGramQueryContainer(2).target_surface().given_stem().given_items), equal_to("[(stem)]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(False).given_items), equal_to("[(stem)]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]"))

        assert_that(str(WordNGramQueryContainer(2).target_surface().given_items), equal_to("[]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_items), equal_to("[]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_items), equal_to("[]"))

        assert_that(str(WordNGramQueryContainer(2).target_surface().given_stem().given_items), equal_to("[(stem)]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(False).given_items), equal_to("[(stem)]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]"))
        assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]"))



        assert_that(str(WordNGramQueryContainer(3).target_surface().given_items), equal_to("[]"))
        assert_that(str(WordNGramQueryContainer(3).target_surface(False).given_items), equal_to("[]"))
        assert_that(str(WordNGramQueryContainer(3).target_surface(True).given_items), equal_to("[]"))

        assert_that(str(WordNGramQueryContainer(3).target_surface().given_stem().given_stem().given_items), equal_to("[(stem), (stem)]"))
        assert_that(str(WordNGramQueryContainer(3).target_surface(False).given_stem(True).given_stem().given_items), equal_to("[(stem, syntactic_category), (stem)]"))
        assert_that(str(WordNGramQueryContainer(3).target_surface(False).given_stem(True).given_stem(True).given_items), equal_to("[(stem, syntactic_category), (stem, syntactic_category)]"))
        assert_that(str(WordNGramQueryContainer(3).target_surface(True).given_stem(False).given_stem(True).given_items), equal_to("[(stem), (stem, syntactic_category)]"))
예제 #6
0
    def test_create_context_keys_and_index_without_target(self):
        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.unigram_collection, WordNGramQueryContainer(1).given_surface(False), False)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word1GramIdx_0_surface"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.unigram_collection, WordNGramQueryContainer(1).given_surface(False), True)
        assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"]))
        assert_that(query_execution_context.collection, is_(self.unigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word1GramIdx_0_surface"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(True).given_surface(True), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_cat_1_surface_cat"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(True).given_surface(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_cat_1_surface"))


        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(False).given_surface(True), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface_cat"))

        query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(False).given_surface(False), True)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))
        assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface"))
예제 #7
0
    def test_create_context_keys_with_target_and_given(self):
        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(False).given_surface(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(True ).given_surface(False), True)
        assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_0.word.surface.value']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))


        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(False).given_surface(True ), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(False).given_surface(True ), True)
        assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.bigram_collection))


        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value', 'item_2.word.stem.value']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(True ), True)
        assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_2.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_1.word.stem.value', 'item_1.word.stem.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))


        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), True)
        assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value']))
        assert_that(query_execution_context.collection, is_(self.trigram_collection))


        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(4).target_surface(False).given_surface(True ).given_stem(False).given_lemma_root(False), False)
        assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value', 'item_3.word.lemma_root.value']))
        assert_that(query_execution_context.collection, is_(self.fourgram_collection))

        query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(4).target_surface(True ).given_surface(True ).given_stem(False).given_lemma_root(True), True)
        assert_that(query_execution_context.keys, equal_to(['item_3.word.surface.value', 'item_3.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value', 'item_2.word.lemma_root.value', 'item_2.word.lemma_root.syntactic_category']))
        assert_that(query_execution_context.collection, is_(self.fourgram_collection))