コード例 #1
0
    def test_generate_likelihood_of_one_word_given_two_context_words(self):
    #        query_logger.setLevel(logging.DEBUG)
    #        collocation_likelihood_calculator_logger.setLevel(logging.DEBUG)

        leading_context = [[MockMorphemeContainerBuilder.builder(None, u"gençten", "Noun").stem(u"genç", "Noun").lexeme(u"genç", "Adj").build()],[MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]]
        surface = u'erkek'
        following_context = [[MockMorphemeContainerBuilder.builder(None, u"girdi", "Verb").stem(u"gir", "Verb").lexeme(u"gir", "Verb").build()], [MockMorphemeContainerBuilder.builder(None, u".", "Punc").build()]]

        self._test_generate_likelihood(surface=surface, leading_context=leading_context, following_context=following_context)
コード例 #2
0
    def test_generate_likelihood_of_one_word_given_two_context_words(self):
        leading_context = [[MockMorphemeContainerBuilder.builder(None, u"gençten", "Noun").stem(u"genç", "Noun").lexeme(u"genç", "Adj").build()],[MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]]
        surface = u'erkek'
        following_context = [[MockMorphemeContainerBuilder.builder(None, u"girdi", "Verb").stem(u"gir", "Verb").lexeme(u"gir", "Verb").build()], [MockMorphemeContainerBuilder.builder(None, u".", "Punc").build()]]

        calculation_context = {}

        self._test_generate_likelihood(surface=surface, leading_context=leading_context, following_context=following_context, calculation_context=calculation_context)

        pprint.pprint(calculation_context)
コード例 #3
0
    def test_generate_likelihood_of_one_word_given_one_following_context_word_sc1(self):
    #        query_logger.setLevel(logging.DEBUG)
    #        collocation_likelihood_calculator_logger.setLevel(logging.DEBUG)

        context = [[MockMorphemeContainerBuilder.builder(None, u"girdi", "Noun").stem(u"gir", "Verb").lexeme(u"gir", "Verb").build()]]
        surface = u'erkek'

        self._test_generate_likelihood(surface=surface, leading_context=None, following_context=context)
コード例 #4
0
    def test_generate_likelihood_of_one_word_given_one_leading_context_word_sc4(self):
    #        query_logger.setLevel(logging.DEBUG)
    #        collocation_likelihood_calculator_logger.setLevel(logging.DEBUG)

        context = [[MockMorphemeContainerBuilder.builder(None, u"Kerem", "Noun", "Prop").build()]]
        surface = u'ter'

        self._test_generate_likelihood(surface=surface, leading_context=context, following_context=None)
コード例 #5
0
    def test_generate_likelihood_of_one_word_given_one_leading_context_word(self):
        context = [[MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]]
        surface = u'erkek'

        calculation_context = {}

        self._test_generate_likelihood(surface=surface, leading_context=context, calculation_context=calculation_context)

        pprint.pprint(calculation_context)
コード例 #6
0
    def test_generate_likelihood_of_one_word_given_two_context_words(self):
        leading_context = [[
            MockMorphemeContainerBuilder.builder(
                None, u"gençten",
                "Noun").stem(u"genç", "Noun").lexeme(u"genç", "Adj").build()
        ], [MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]]
        surface = u'erkek'
        following_context = [[
            MockMorphemeContainerBuilder.builder(None, u"girdi", "Verb").stem(
                u"gir", "Verb").lexeme(u"gir", "Verb").build()
        ], [MockMorphemeContainerBuilder.builder(None, u".", "Punc").build()]]

        calculation_context = {}

        self._test_generate_likelihood(surface=surface,
                                       leading_context=leading_context,
                                       following_context=following_context,
                                       calculation_context=calculation_context)

        pprint.pprint(calculation_context)
コード例 #7
0
    def test_generate_likelihood_of_one_word_given_one_leading_context_word(
            self):
        context = [[
            MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()
        ]]
        surface = u'erkek'

        calculation_context = {}

        self._test_generate_likelihood(surface=surface,
                                       leading_context=context,
                                       calculation_context=calculation_context)

        pprint.pprint(calculation_context)
コード例 #8
0
    def _create_mock_container(self, word):
        if isinstance(word, UnparsableWordBinding):
            print u'Previous word is unparsable, skipped : {}'.format(word.str)
            return None

        surface_str, surface_syntactic_category = word.str, word.syntactic_category
        stem_str, stem_syntactic_category, stem_secondary_syntactic_category = WordNGramGenerator._get_stem(word)
        lemma_root_str, lemma_root_syntactic_category = word.root.lemma_root, word.root.syntactic_category

        if word.secondary_syntactic_category:
            surface_syntactic_category += u'_' + word.secondary_syntactic_category
        if stem_secondary_syntactic_category:
            stem_syntactic_category += u'_' + stem_secondary_syntactic_category
        if word.root.secondary_syntactic_category:
            lemma_root_syntactic_category += u'_' + word.root.secondary_syntactic_category

        return MockMorphemeContainerBuilder.builder(word.format(), surface_str, surface_syntactic_category).stem(stem_str, stem_syntactic_category).lexeme(lemma_root_str, lemma_root_syntactic_category).build()
コード例 #9
0
    def _create_mock_container(self, word):
        if isinstance(word, UnparsableWordBinding):
            print u'Previous word is unparsable, skipped : {}'.format(word.str)
            return None

        surface_str, surface_syntactic_category = word.str, word.syntactic_category
        stem_str, stem_syntactic_category, stem_secondary_syntactic_category = WordNGramGenerator._get_stem(
            word)
        lemma_root_str, lemma_root_syntactic_category = word.root.lemma_root, word.root.syntactic_category

        if word.secondary_syntactic_category:
            surface_syntactic_category += u'_' + word.secondary_syntactic_category
        if stem_secondary_syntactic_category:
            stem_syntactic_category += u'_' + stem_secondary_syntactic_category
        if word.root.secondary_syntactic_category:
            lemma_root_syntactic_category += u'_' + word.root.secondary_syntactic_category

        return MockMorphemeContainerBuilder.builder(
            word.format(), surface_str, surface_syntactic_category).stem(
                stem_str, stem_syntactic_category).lexeme(
                    lemma_root_str, lemma_root_syntactic_category).build()
コード例 #10
0
    def _build_parse_context_item_from_word(self, parse_context_word):
        if parse_context_word['parsed']:
            return [
                MockMorphemeContainerBuilder(
                    parse_context_word.get('parse_result'),
                    parse_context_word['surface'],
                    parse_context_word['surface_syntactic_category'],
                    parse_context_word['surface_secondary_syntactic_category']
                )\
                .stem(
                    parse_context_word['stem'],
                    parse_context_word['stem_syntactic_category'],
                    parse_context_word['stem_secondary_syntactic_category'],
                ).lexeme(
                    parse_context_word['lemma_root'],
                    parse_context_word['lemma_root_syntactic_category'],
                    parse_context_word['lemma_root_secondary_syntactic_category']
                )\
                .build()
            ]

        else:
            return self.morphological_parser.parse(
                parse_context_word['surface'])