def create_indexes(self, appender_matrix): for appender_tuple in appender_matrix: target_appender, context_appender = None, None if len(appender_tuple) > 1: target_appender, context_appender = appender_tuple else: context_appender = appender_tuple[0] for key_index in range(0, len(self._collection_map.keys())): n = sorted(list(self._collection_map.keys()))[key_index] collection = self._collection_map[n] if n > 1: smaller_collection = self._collection_map[n - 1] smaller_index_container = WordNGramQueryContainer(n - 1) self._create_container_and_index(smaller_collection, smaller_index_container, n - 1, None, context_appender) index_container = WordNGramQueryContainer(n) self._create_container_and_index(collection, index_container, n, target_appender, context_appender)
def _count_target_form_given_context(self, target, context, target_comes_after, target_appender, context_appender): query_container = WordNGramQueryContainer( len(context) + 1) if target_appender else WordNGramQueryContainer(len(context)) params = [] if target_appender: target_appender.append(target, query_container, params) for context_item in context: context_appender.append(context_item, query_container, params) return self._find_count_for_query(params, query_container, target_comes_after)
def test_create_context_keys_without_given(self): query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), False) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), True) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(True), False) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value", "item_0.word.surface.syntactic_category"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(True), True) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value", "item_0.word.surface.syntactic_category"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), False) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(1).target_surface(False), True) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"])) assert_that(query_execution_context.collection, is_(self.unigram_collection))
def test_create_context_keys_and_index_with_target_and_given(self): query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(False).given_surface(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(True ).given_surface(False), True) assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_0.word.surface.value'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_1_surface_cat_0_surface")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(False).given_surface(True ), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface_cat")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).target_surface(False).given_surface(True ), True) assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_1_surface_0_surface_cat")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value', 'item_2.word.stem.value'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) assert_that(query_execution_context.index_name, equal_to("word3GramIdx_0_surface_cat_1_surface_2_stem")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(True ), True) assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_2.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_1.word.stem.value', 'item_1.word.stem.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) assert_that(query_execution_context.index_name, equal_to("word3GramIdx_2_surface_cat_0_surface_1_stem_cat")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) assert_that(query_execution_context.index_name, equal_to("word3GramIdx_0_surface_1_surface_cat_2_stem")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.trigram_collection, WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), True) assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) assert_that(query_execution_context.index_name, equal_to("word3GramIdx_2_surface_0_surface_cat_1_stem")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.fourgram_collection, WordNGramQueryContainer(4).target_surface(False).given_surface(True ).given_stem(False).given_lemma_root(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value', 'item_3.word.lemma_root.value'])) assert_that(query_execution_context.collection, is_(self.fourgram_collection)) assert_that(query_execution_context.index_name, equal_to("word4GramIdx_0_surface_1_surface_cat_2_stem_3_lemma_root")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.fourgram_collection, WordNGramQueryContainer(4).target_surface(True ).given_surface(True ).given_stem(False).given_lemma_root(True), True) assert_that(query_execution_context.keys, equal_to(['item_3.word.surface.value', 'item_3.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value', 'item_2.word.lemma_root.value', 'item_2.word.lemma_root.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.fourgram_collection)) assert_that(query_execution_context.index_name, equal_to("word4GramIdx_3_surface_cat_0_surface_cat_1_stem_2_lemma_root_cat"))
def test_add_criterion(self): self.assertRaises(AssertionError, lambda: WordNGramQueryContainer(-1)) self.assertRaises(AssertionError, lambda: WordNGramQueryContainer(0)) assert_that(str(WordNGramQueryContainer(1).target_surface().target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(1).target_surface(False).target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(1).target_surface(True).target_item), equal_to("(surface, syntactic_category)")) assert_that(str(WordNGramQueryContainer(2).target_surface().target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(2).target_surface(True).target_item), equal_to("(surface, syntactic_category)")) assert_that(str(WordNGramQueryContainer(2).target_surface().given_stem().target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(False).target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(True).target_item), equal_to("(surface)")) assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_stem(True).target_item), equal_to("(surface, syntactic_category)")) assert_that(str(WordNGramQueryContainer(2).target_surface().given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(2).target_surface().given_stem().given_items), equal_to("[(stem)]")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(False).given_items), equal_to("[(stem)]")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]")) assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]")) assert_that(str(WordNGramQueryContainer(2).target_surface().given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(2).target_surface().given_stem().given_items), equal_to("[(stem)]")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(False).given_items), equal_to("[(stem)]")) assert_that(str(WordNGramQueryContainer(2).target_surface(False).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]")) assert_that(str(WordNGramQueryContainer(2).target_surface(True).given_stem(True).given_items), equal_to("[(stem, syntactic_category)]")) assert_that(str(WordNGramQueryContainer(3).target_surface().given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(3).target_surface(False).given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(3).target_surface(True).given_items), equal_to("[]")) assert_that(str(WordNGramQueryContainer(3).target_surface().given_stem().given_stem().given_items), equal_to("[(stem), (stem)]")) assert_that(str(WordNGramQueryContainer(3).target_surface(False).given_stem(True).given_stem().given_items), equal_to("[(stem, syntactic_category), (stem)]")) assert_that(str(WordNGramQueryContainer(3).target_surface(False).given_stem(True).given_stem(True).given_items), equal_to("[(stem, syntactic_category), (stem, syntactic_category)]")) assert_that(str(WordNGramQueryContainer(3).target_surface(True).given_stem(False).given_stem(True).given_items), equal_to("[(stem), (stem, syntactic_category)]"))
def test_create_context_keys_and_index_without_target(self): query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.unigram_collection, WordNGramQueryContainer(1).given_surface(False), False) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) assert_that(query_execution_context.index_name, equal_to("word1GramIdx_0_surface")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.unigram_collection, WordNGramQueryContainer(1).given_surface(False), True) assert_that(query_execution_context.keys, equal_to(["item_0.word.surface.value"])) assert_that(query_execution_context.collection, is_(self.unigram_collection)) assert_that(query_execution_context.index_name, equal_to("word1GramIdx_0_surface")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(True).given_surface(True), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_cat_1_surface_cat")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(True).given_surface(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_cat_1_surface")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(False).given_surface(True), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface_cat")) query_execution_context = QueryExecutionIndexContextBuilder().create_context(self.bigram_collection, WordNGramQueryContainer(2).given_surface(False).given_surface(False), True) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) assert_that(query_execution_context.index_name, equal_to("word2GramIdx_0_surface_1_surface"))
def test_create_context_keys_with_target_and_given(self): query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(False).given_surface(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(True ).given_surface(False), True) assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_0.word.surface.value'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(False).given_surface(True ), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(2).target_surface(False).given_surface(True ), True) assert_that(query_execution_context.keys, equal_to(['item_1.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.bigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.surface.value', 'item_2.word.stem.value'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(True ).given_surface(False).given_stem(True ), True) assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_2.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_1.word.stem.value', 'item_1.word.stem.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(3).target_surface(False).given_surface(True ).given_stem(False), True) assert_that(query_execution_context.keys, equal_to(['item_2.word.surface.value', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value'])) assert_that(query_execution_context.collection, is_(self.trigram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(4).target_surface(False).given_surface(True ).given_stem(False).given_lemma_root(False), False) assert_that(query_execution_context.keys, equal_to(['item_0.word.surface.value', 'item_1.word.surface.value', 'item_1.word.surface.syntactic_category', 'item_2.word.stem.value', 'item_3.word.lemma_root.value'])) assert_that(query_execution_context.collection, is_(self.fourgram_collection)) query_execution_context = QueryExecutionContextBuilder(self.collection_map).create_context(WordNGramQueryContainer(4).target_surface(True ).given_surface(True ).given_stem(False).given_lemma_root(True), True) assert_that(query_execution_context.keys, equal_to(['item_3.word.surface.value', 'item_3.word.surface.syntactic_category', 'item_0.word.surface.value', 'item_0.word.surface.syntactic_category', 'item_1.word.stem.value', 'item_2.word.lemma_root.value', 'item_2.word.lemma_root.syntactic_category'])) assert_that(query_execution_context.collection, is_(self.fourgram_collection))