def create_calculator(cls, parseset_index):
        mongodb_connection = pymongo.Connection(host='127.0.0.1')
        collection_map = {
            1:
            mongodb_connection['trnltk']['wordUnigrams{}'.format(
                parseset_index)],
            2:
            mongodb_connection['trnltk']['wordBigrams{}'.format(
                parseset_index)],
            3:
            mongodb_connection['trnltk']['wordTrigrams{}'.format(
                parseset_index)]
        }

        query_cache_collection = QueryCacheCollectionCreator(
            mongodb_connection['trnltk']).build(drop=False)

        database_index_builder = DatabaseIndexBuilder(collection_map)
        target_form_given_context_counter = CachingTargetFormGivenContextCounter(
            collection_map, query_cache_collection)
        ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother(
        )
        sequence_likelihood_calculator = UniformSequenceLikelihoodCalculator()

        return ContextParsingLikelihoodCalculator(
            database_index_builder, target_form_given_context_counter,
            ngram_frequency_smoother, sequence_likelihood_calculator)
Esempio n. 2
0
    def setUpClass(cls):
        super(InterpolatingLikelihoodCalculatorCalculationContextTest,
              cls).setUpClass()
        all_roots = []

        lexemes = LexiconLoader.load_from_file(
            os.path.join(os.path.dirname(__file__),
                         '../../../../../resources/master_dictionary.txt'))
        for di in lexemes:
            all_roots.extend(RootGenerator.generate(di))

        root_map_generator = RootMapGenerator()
        cls.root_map = root_map_generator.generate(all_roots)

        suffix_graph = CopulaSuffixGraph(
            NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph())))
        suffix_graph.initialize()

        predefined_paths = PredefinedPaths(cls.root_map, suffix_graph)
        predefined_paths.create_predefined_paths()

        word_root_finder = WordRootFinder(cls.root_map)
        digit_numeral_root_finder = DigitNumeralRootFinder()
        text_numeral_root_finder = TextNumeralRootFinder(cls.root_map)
        proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder(
        )
        proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder(
        )

        cls.contextless_parser = UpperCaseSupportingContextlessMorphologicalParser(
            suffix_graph, predefined_paths, [
                word_root_finder, digit_numeral_root_finder,
                text_numeral_root_finder,
                proper_noun_from_apostrophe_root_finder,
                proper_noun_without_apostrophe_root_finder
            ])

        mongodb_connection = pymongo.Connection(host='127.0.0.1')
        cls.collection_map = {
            1: mongodb_connection['trnltk']['wordUnigrams999'],
            2: mongodb_connection['trnltk']['wordBigrams999'],
            3: mongodb_connection['trnltk']['wordTrigrams999']
        }

        database_index_builder = DatabaseIndexBuilder(cls.collection_map)
        target_form_given_context_counter = InMemoryCachingTargetFormGivenContextCounter(
            cls.collection_map)
        ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother(
        )
        sequence_likelihood_calculator = UniformSequenceLikelihoodCalculator()

        wrapped_generator = ContextParsingLikelihoodCalculator(
            database_index_builder, target_form_given_context_counter,
            ngram_frequency_smoother, sequence_likelihood_calculator)

        cls.generator = InterpolatingLikelihoodCalculator(wrapped_generator)
    def test_create_context_parsing_appender_index_for_unigram_collection(
            self):
        index_builder = DatabaseIndexBuilder(self.collection_map_for_N_1)

        index_builder.create_indexes(self.context_parsing_appender_matrix_ROW1)
        index_builder.create_indexes(self.context_parsing_appender_matrix_ROW2)
        index_builder.create_indexes(self.context_parsing_appender_matrix_ROW3)
Esempio n. 4
0
    def setUpClass(cls):
        super(ContextParsingLikelihoodCalculatorTest, cls).setUpClass()

        database_index_builder = DatabaseIndexBuilder(cls.collection_map)
        target_form_given_context_counter = TargetFormGivenContextCounter(
            cls.collection_map)
        ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother(
        )
        sequence_likelihood_calculator = UniformSequenceLikelihoodCalculator()

        cls.generator = ContextParsingLikelihoodCalculator(
            database_index_builder, target_form_given_context_counter,
            ngram_frequency_smoother, sequence_likelihood_calculator)
    def setUp(self):
        super(ContextParsingLikelihoodCalculatorCachingTest, self).setUp()

        database = self.mongodb_connection['trnltk']

        query_cache_collection = QueryCacheCollectionCreator(database).build(
            drop=True)

        database_index_builder = DatabaseIndexBuilder(self.collection_map)
        target_form_given_context_counter = CachingTargetFormGivenContextCounter(
            self.collection_map, query_cache_collection)
        ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother(
        )
        sequence_likelihood_calculator = UniformSequenceLikelihoodCalculator()

        self.generator = ContextParsingLikelihoodCalculator(
            database_index_builder, target_form_given_context_counter,
            ngram_frequency_smoother, sequence_likelihood_calculator)
    def test_create_noncontext_parsing_appender_index_for_trigram_collection(
            self):
        index_builder = DatabaseIndexBuilder(self.collection_map_for_N_3)

        index_builder.create_indexes(self.non_context_parsing_appender_matrix)
    def test_create_context_parsing_appender_index_for_trigram_collection(self):
        index_builder = DatabaseIndexBuilder(self.collection_map_for_N_3)

        index_builder.create_indexes(self.context_parsing_appender_matrix_ROW1)
        index_builder.create_indexes(self.context_parsing_appender_matrix_ROW2)
        index_builder.create_indexes(self.context_parsing_appender_matrix_ROW3)
    def test_create_noncontext_parsing_appender_index_for_bigram_collection(self):
        index_builder = DatabaseIndexBuilder(self.collection_map_for_N_2)

        index_builder.create_indexes(self.non_context_parsing_appender_matrix)
    def build_indexes(self):
        index_builder = DatabaseIndexBuilder(self._collection_map)

        index_builder.create_indexes([(_context_word_appender,)])
        index_builder.create_indexes(self.APPENDER_MATRIX)
Esempio n. 10
0
    def build_indexes(self):
        index_builder = DatabaseIndexBuilder(self._collection_map)

        index_builder.create_indexes([(_context_word_appender, )])
        index_builder.create_indexes(self.APPENDER_MATRIX)