def testNormalizedLogRatioMeasure(self): """Test normalized_log_ratio_measure()""" obtained = direct_confirmation_measure.log_ratio_measure( self.segmentation, self.accumulator, normalize=True)[0] # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753 expected = -0.113282753 self.assertAlmostEqual(expected, obtained) mean, std = direct_confirmation_measure.log_ratio_measure( self.segmentation, self.accumulator, normalize=True, with_std=True)[0] self.assertAlmostEqual(expected, mean) self.assertEqual(0.0, std)
def testLogRatioMeasure(self): """Test log_ratio_measure()""" obtained = direct_confirmation_measure.log_ratio_measure( self.segmentation, self.accumulator)[0] # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557 expected = -0.182321557 self.assertAlmostEqual(expected, obtained) mean, std = direct_confirmation_measure.log_ratio_measure( self.segmentation, self.accumulator, with_std=True)[0] self.assertAlmostEqual(expected, mean) self.assertEqual(0.0, std)
def testLogRatioMeasure(self): """Test log_ratio_measure()""" obtained = direct_confirmation_measure.log_ratio_measure( self.segmentation, self.posting_list, self.num_docs)[0] # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557 expected = -0.182321557 self.assertAlmostEqual(obtained, expected)
def testNormalizedLogRatioMeasure(self): """Test normalized_log_ratio_measure()""" obtained = direct_confirmation_measure.log_ratio_measure( self.segmentation, self.posting_list, self.num_docs, normalize=True)[0] # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753 expected = -0.113282753 self.assertAlmostEqual(obtained, expected)
def pmi(topics, corpus, dictionary=None): """ """ unique_ids = unique_ids = set(chain(*topics)) accumulator = inverted_accumulator(corpus, unique_ids, dictionary=dictionary) segments = segmentation.s_one_pre(topics) scores = direct_confirmation_measure.log_ratio_measure( segments, accumulator) return np.array(scores)
def _pair_npmi(pair, accumulator): """Compute normalized pairwise mutual information (**NPMI**) between a pair of words. Parameters ---------- pair : (int, int) The pair of words (word_id1, word_id2). accumulator : :class:`~gensim.topic_coherence.text_analysis.InvertedIndexAccumulator` Word occurrence accumulator from probability_estimation. Return ------ float NPMI between a pair of words. """ return log_ratio_measure([[pair]], accumulator, True)[0]
#Make accumulator accumulator = probability_estimation.p_boolean_document( corpus, segmented_topics) #Perform the measurements and print results lcp = direct_confirmation_measure.log_conditional_probability( segmented_topics, accumulator) with codecs.open(outputfile1, encoding='utf-8', mode='w', errors='ignore') as outputFile: for item in lcp: outputFile.write('%s \n' % (item)) pmi = direct_confirmation_measure.log_ratio_measure( segmented_topics, accumulator) with codecs.open(outputfile2, encoding='utf-8', mode='w', errors='ignore') as outputFile: for item in pmi: outputFile.write('%s \n' % (item)) cosim = indirect_confirmation_measure.cosine_similarity( segmented_topics, accumulator, unsegmented_topics, 'nlr', 1) with codecs.open(outputfile3, encoding='utf-8', mode='w', errors='ignore') as outputFile: for item in cosim: outputFile.write('%s \n' % (item)) npmi = direct_confirmation_measure.log_ratio_measure(segmented_topics,
def _pair_npmi(pair, accumulator): """Compute normalized pairwise mutual information (NPMI) between a pair of words. The pair is an iterable of (word_id1, word_id2). """ return direct_confirmation_measure.log_ratio_measure([[pair]], accumulator, True)[0]
def testNormalizedLogRatioMeasure(self): """Test normalized_log_ratio_measure()""" obtained = direct_confirmation_measure.log_ratio_measure(self.segmentation, self.posting_list, self.num_docs, normalize=True)[0] # Answer should be ~ -0.182321557 / -ln(1 / 5) = -0.113282753 expected = -0.113282753 self.assertAlmostEqual(obtained, expected)
def testLogRatioMeasure(self): """Test log_ratio_measure()""" obtained = direct_confirmation_measure.log_ratio_measure(self.segmentation, self.posting_list, self.num_docs)[0] # Answer should be ~ ln{(1 / 5) / [(3 / 5) * (2 / 5)]} = -0.182321557 expected = -0.182321557 self.assertAlmostEqual(obtained, expected)