コード例 #1
0
    def testNormalizedLogRatioMeasure(self):
        """Test normalized_log_ratio_measure()"""
        obtained = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator, normalize=True)[0]
        # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753
        expected = -0.113282753
        self.assertAlmostEqual(expected, obtained)

        mean, std = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator, normalize=True, with_std=True)[0]
        self.assertAlmostEqual(expected, mean)
        self.assertEqual(0.0, std)
コード例 #2
0
    def testLogRatioMeasure(self):
        """Test log_ratio_measure()"""
        obtained = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator)[0]
        # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557
        expected = -0.182321557
        self.assertAlmostEqual(expected, obtained)

        mean, std = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator, with_std=True)[0]
        self.assertAlmostEqual(expected, mean)
        self.assertEqual(0.0, std)
コード例 #3
0
    def testLogRatioMeasure(self):
        """Test log_ratio_measure()"""
        obtained = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator)[0]
        # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557
        expected = -0.182321557
        self.assertAlmostEqual(expected, obtained)

        mean, std = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator, with_std=True)[0]
        self.assertAlmostEqual(expected, mean)
        self.assertEqual(0.0, std)
コード例 #4
0
    def testNormalizedLogRatioMeasure(self):
        """Test normalized_log_ratio_measure()"""
        obtained = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator, normalize=True)[0]
        # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753
        expected = -0.113282753
        self.assertAlmostEqual(expected, obtained)

        mean, std = direct_confirmation_measure.log_ratio_measure(
            self.segmentation, self.accumulator, normalize=True,
            with_std=True)[0]
        self.assertAlmostEqual(expected, mean)
        self.assertEqual(0.0, std)
コード例 #5
0
 def testLogRatioMeasure(self):
     """Test log_ratio_measure()"""
     obtained = direct_confirmation_measure.log_ratio_measure(
         self.segmentation, self.posting_list, self.num_docs)[0]
     # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557
     expected = -0.182321557
     self.assertAlmostEqual(obtained, expected)
コード例 #6
0
 def testNormalizedLogRatioMeasure(self):
     """Test normalized_log_ratio_measure()"""
     obtained = direct_confirmation_measure.log_ratio_measure(
         self.segmentation,
         self.posting_list,
         self.num_docs,
         normalize=True)[0]
     # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753
     expected = -0.113282753
     self.assertAlmostEqual(obtained, expected)
コード例 #7
0
ファイル: coherence.py プロジェクト: EemeliSaari/DeepTrends
def pmi(topics, corpus, dictionary=None):
    """

    """
    unique_ids = unique_ids = set(chain(*topics))

    accumulator = inverted_accumulator(corpus,
                                       unique_ids,
                                       dictionary=dictionary)

    segments = segmentation.s_one_pre(topics)

    scores = direct_confirmation_measure.log_ratio_measure(
        segments, accumulator)

    return np.array(scores)
コード例 #8
0
def _pair_npmi(pair, accumulator):
    """Compute normalized pairwise mutual information (**NPMI**) between a pair of words.

    Parameters
    ----------
    pair : (int, int)
        The pair of words (word_id1, word_id2).
    accumulator : :class:`~gensim.topic_coherence.text_analysis.InvertedIndexAccumulator`
        Word occurrence accumulator from probability_estimation.

    Return
    ------
    float
        NPMI between a pair of words.

    """
    return log_ratio_measure([[pair]], accumulator, True)[0]
コード例 #9
0
def _pair_npmi(pair, accumulator):
    """Compute normalized pairwise mutual information (**NPMI**) between a pair of words.

    Parameters
    ----------
    pair : (int, int)
        The pair of words (word_id1, word_id2).
    accumulator : :class:`~gensim.topic_coherence.text_analysis.InvertedIndexAccumulator`
        Word occurrence accumulator from probability_estimation.

    Return
    ------
    float
        NPMI between a pair of words.

    """
    return log_ratio_measure([[pair]], accumulator, True)[0]
    #Make accumulator
    accumulator = probability_estimation.p_boolean_document(
        corpus, segmented_topics)

    #Perform the measurements and print results

    lcp = direct_confirmation_measure.log_conditional_probability(
        segmented_topics, accumulator)

    with codecs.open(outputfile1, encoding='utf-8', mode='w',
                     errors='ignore') as outputFile:
        for item in lcp:
            outputFile.write('%s \n' % (item))

    pmi = direct_confirmation_measure.log_ratio_measure(
        segmented_topics, accumulator)

    with codecs.open(outputfile2, encoding='utf-8', mode='w',
                     errors='ignore') as outputFile:
        for item in pmi:
            outputFile.write('%s \n' % (item))

    cosim = indirect_confirmation_measure.cosine_similarity(
        segmented_topics, accumulator, unsegmented_topics, 'nlr', 1)

    with codecs.open(outputfile3, encoding='utf-8', mode='w',
                     errors='ignore') as outputFile:
        for item in cosim:
            outputFile.write('%s \n' % (item))

    npmi = direct_confirmation_measure.log_ratio_measure(segmented_topics,
コード例 #11
0
def _pair_npmi(pair, accumulator):
    """Compute normalized pairwise mutual information (NPMI) between a pair of words.
    The pair is an iterable of (word_id1, word_id2).
    """
    return direct_confirmation_measure.log_ratio_measure([[pair]], accumulator,
                                                         True)[0]
コード例 #12
0
 def testNormalizedLogRatioMeasure(self):
     """Test normalized_log_ratio_measure()"""
     obtained = direct_confirmation_measure.log_ratio_measure(self.segmentation, self.posting_list, self.num_docs, normalize=True)[0]
     # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753
     expected = -0.113282753
     self.assertAlmostEqual(obtained, expected)
コード例 #13
0
 def testLogRatioMeasure(self):
     """Test log_ratio_measure()"""
     obtained = direct_confirmation_measure.log_ratio_measure(self.segmentation, self.posting_list, self.num_docs)[0]
     # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557
     expected = -0.182321557
     self.assertAlmostEqual(obtained, expected)
コード例 #14
0
def _pair_npmi(pair, accumulator):
    """Compute normalized pairwise mutual information (NPMI) between a pair of words.
    The pair is an iterable of (word_id1, word_id2).
    """
    return direct_confirmation_measure.log_ratio_measure([[pair]], accumulator, True)[0]