Python Phrases.find_phrasesの例

プログラミング言語: Python

名前空間/パッケージ名: gensim.models.phrases

クラス/型: Phrases

メソッド/関数: find_phrases

hotexamples.comのコード掲載数: 10

Python Phrases.find_phrases - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgensim.models.phrases.Phrases.find_phrasesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

load(30)

Phrases(30)

save(30)

export_phrases(27)

find_phrases(10)

add_vocab(9)

learn_vocab(3)

phrasegrams(2)

freeze(1)

score_item(1)

scoring(1)

vocab(1)

コード例 #1

ファイルを表示

    def testScoringDefault(self):
        """ test the default scoring, from the mikolov word2vec paper """
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         connector_words=self.connector_words)
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        min_count = float(bigram.min_count)
        len_vocab = float(len(bigram.vocab))
        graph = float(bigram.vocab["graph"])
        data = float(bigram.vocab["data"])
        data_and_graph = float(bigram.vocab["data_and_graph"])
        human = float(bigram.vocab["human"])
        interface = float(bigram.vocab["interface"])
        human_interface = float(bigram.vocab["human_interface"])

        assert seen_scores == set([
            # score for data and graph
            round((data_and_graph - min_count) / data / graph * len_vocab, 3),
            # score for human interface
            round(
                (human_interface - min_count) / human / interface * len_vocab,
                3),
        ])

コード例 #2

ファイルを表示

    def testMultipleBigramsSingleEntry(self):
        """Test a single entry produces multiple bigrams."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         delimiter=' ')
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        seen_bigrams = set(bigram.find_phrases(test_sentences).keys())

        assert seen_bigrams == {'graph minors', 'human interface'}

コード例 #3

ファイルを表示

    def testExportPhrases(self):
        """Test Phrases bigram export phrases."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         delimiter=' ')
        seen_bigrams = set(bigram.find_phrases(self.sentences).keys())

        assert seen_bigrams == {
            'response time',
            'graph minors',
            'human interface',
        }

コード例 #4

ファイルを表示

    def testExportPhrases(self):
        """Test Phrases bigram export phrases."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         connector_words=self.connector_words,
                         delimiter=' ')
        seen_bigrams = set(bigram.find_phrases(self.sentences).keys())

        assert seen_bigrams == set([
            'human interface',
            'graph of trees',
            'data and graph',
            'lack of interest',
        ])

コード例 #5

ファイルを表示

    def testCustomScorer(self):
        """Test using a custom scoring function."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=.001,
                         scoring=dumb_scorer)
        test_sentences = [[
            'graph', 'minors', 'survey', 'human', 'interface', 'system'
        ]]
        seen_scores = list(bigram.find_phrases(test_sentences).values())

        assert all(score == 1 for score in seen_scores)
        assert len(
            seen_scores
        ) == 3  # 'graph minors' and 'survey human' and 'interface system'

コード例 #6

ファイルを表示

    def testScoringNpmi(self):
        """Test normalized pointwise mutual information scoring."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=.5,
                         scoring='npmi')
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        assert seen_scores == {
            .882,  # score for graph minors
            .714  # score for human interface
        }

コード例 #7

ファイルを表示

    def testScoringDefault(self):
        """Test the default scoring, from the mikolov word2vec paper."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         delimiter=' ')
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        assert seen_scores == {
            5.167,  # score for graph minors
            3.444  # score for human interface
        }

コード例 #8

ファイルを表示

    def testCustomScorer(self):
        """Test using a custom scoring function."""
        bigram = Phrases(
            self.sentences,
            min_count=1,
            threshold=.001,
            scoring=dumb_scorer,
            connector_words=self.connector_words,
        )
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_scores = list(bigram.find_phrases(test_sentences).values())

        assert all(seen_scores)  # all scores 1
        assert len(seen_scores) == 2  # 'data and graph' 'survey for human'

コード例 #9

ファイルを表示

    def testMultipleBigramsSingleEntry(self):
        """Test a single entry produces multiple bigrams."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         connector_words=self.connector_words,
                         delimiter=' ')
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_bigrams = set(bigram.find_phrases(test_sentences).keys())

        assert seen_bigrams == set([
            'data and graph',
            'human interface',
        ])

コード例 #10

ファイルを表示

    def testScoringNpmi(self):
        """Test normalized pointwise mutual information scoring."""
        bigram = Phrases(
            self.sentences,
            min_count=1,
            threshold=.5,
            scoring='npmi',
            connector_words=self.connector_words,
        )
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        assert seen_scores == set([
            .74,  # score for data and graph
            .894  # score for human interface
        ])