Esempi in Python per Phrases.find_phrases

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: gensim.models.phrases

Classe/tipologia: Phrases

Metodo/funzione: find_phrases

Esempi su hotexamples.com: 10

Phrases.find_phrases in Python: 10 esempi trovati. Questi sono i migliori esempi reali in Python per gensim.models.phrases.Phrases.find_phrases, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

load(30)

Phrases(30)

save(30)

export_phrases(27)

find_phrases(10)

add_vocab(9)

learn_vocab(3)

phrasegrams(2)

freeze(1)

score_item(1)

scoring(1)

vocab(1)

Esempio n. 1

Mostra file

    def testScoringDefault(self):
        """ test the default scoring, from the mikolov word2vec paper """
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         connector_words=self.connector_words)
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        min_count = float(bigram.min_count)
        len_vocab = float(len(bigram.vocab))
        graph = float(bigram.vocab["graph"])
        data = float(bigram.vocab["data"])
        data_and_graph = float(bigram.vocab["data_and_graph"])
        human = float(bigram.vocab["human"])
        interface = float(bigram.vocab["interface"])
        human_interface = float(bigram.vocab["human_interface"])

        assert seen_scores == set([
            # score for data and graph
            round((data_and_graph - min_count) / data / graph * len_vocab, 3),
            # score for human interface
            round(
                (human_interface - min_count) / human / interface * len_vocab,
                3),
        ])

Esempio n. 2

Mostra file

    def testMultipleBigramsSingleEntry(self):
        """Test a single entry produces multiple bigrams."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         delimiter=' ')
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        seen_bigrams = set(bigram.find_phrases(test_sentences).keys())

        assert seen_bigrams == {'graph minors', 'human interface'}

Esempio n. 3

Mostra file

    def testExportPhrases(self):
        """Test Phrases bigram export phrases."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         delimiter=' ')
        seen_bigrams = set(bigram.find_phrases(self.sentences).keys())

        assert seen_bigrams == {
            'response time',
            'graph minors',
            'human interface',
        }

Esempio n. 4

Mostra file

    def testExportPhrases(self):
        """Test Phrases bigram export phrases."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         connector_words=self.connector_words,
                         delimiter=' ')
        seen_bigrams = set(bigram.find_phrases(self.sentences).keys())

        assert seen_bigrams == set([
            'human interface',
            'graph of trees',
            'data and graph',
            'lack of interest',
        ])

Esempio n. 5

Mostra file

    def testCustomScorer(self):
        """Test using a custom scoring function."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=.001,
                         scoring=dumb_scorer)
        test_sentences = [[
            'graph', 'minors', 'survey', 'human', 'interface', 'system'
        ]]
        seen_scores = list(bigram.find_phrases(test_sentences).values())

        assert all(score == 1 for score in seen_scores)
        assert len(
            seen_scores
        ) == 3  # 'graph minors' and 'survey human' and 'interface system'

Esempio n. 6

Mostra file

    def testScoringNpmi(self):
        """Test normalized pointwise mutual information scoring."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=.5,
                         scoring='npmi')
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        assert seen_scores == {
            .882,  # score for graph minors
            .714  # score for human interface
        }

Esempio n. 7

Mostra file

    def testScoringDefault(self):
        """Test the default scoring, from the mikolov word2vec paper."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         delimiter=' ')
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        assert seen_scores == {
            5.167,  # score for graph minors
            3.444  # score for human interface
        }

Esempio n. 8

Mostra file

    def testCustomScorer(self):
        """Test using a custom scoring function."""
        bigram = Phrases(
            self.sentences,
            min_count=1,
            threshold=.001,
            scoring=dumb_scorer,
            connector_words=self.connector_words,
        )
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_scores = list(bigram.find_phrases(test_sentences).values())

        assert all(seen_scores)  # all scores 1
        assert len(seen_scores) == 2  # 'data and graph' 'survey for human'

Esempio n. 9

Mostra file

    def testMultipleBigramsSingleEntry(self):
        """Test a single entry produces multiple bigrams."""
        bigram = Phrases(self.sentences,
                         min_count=1,
                         threshold=1,
                         connector_words=self.connector_words,
                         delimiter=' ')
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_bigrams = set(bigram.find_phrases(test_sentences).keys())

        assert seen_bigrams == set([
            'data and graph',
            'human interface',
        ])

Esempio n. 10

Mostra file

    def testScoringNpmi(self):
        """Test normalized pointwise mutual information scoring."""
        bigram = Phrases(
            self.sentences,
            min_count=1,
            threshold=.5,
            scoring='npmi',
            connector_words=self.connector_words,
        )
        test_sentences = [[
            'data', 'and', 'graph', 'survey', 'for', 'human', 'interface'
        ]]
        seen_scores = set(
            round(score, 3)
            for score in bigram.find_phrases(test_sentences).values())

        assert seen_scores == set([
            .74,  # score for data and graph
            .894  # score for human interface
        ])