def process(self, payload, distribution, topic_id): terms = list(map(lambda id: self.tokens[id], payload)) term_entropies = compute_normalised_entropy( distribution, base=2) distribution = inference.aggregate_distribution( distribution, mode='product', axis=0) assert distribution.ndim == 1 distribution /= distribution.sum() if not np.isclose(distribution.sum(), 1.0): logging.error('Encountered non-normalized ' 'distribution for topic "%s" ' '(mass=%.10f).', topic_id, distribution.sum()) self.f_debug_out.write('Topic {0} {1}: {2}\n'.format( topic_id, math_utils.entropy( distribution, base=2, normalize=True), zip(terms, term_entropies))) ranked_indices = np.argsort(distribution) top_ranked_indices = ranked_indices[::-1] top_ranked_values = distribution[top_ranked_indices] self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
def process(self, payload, distribution, topic_id): terms = list(map(lambda id: self.tokens[id], payload)) term_entropies = compute_normalised_entropy(distribution, base=2) distribution = inference.aggregate_distribution(distribution, mode='product', axis=0) assert distribution.ndim == 1 distribution /= distribution.sum() if not np.isclose(distribution.sum(), 1.0): logging.error( 'Encountered non-normalized ' 'distribution for topic "%s" ' '(mass=%.10f).', topic_id, distribution.sum()) self.f_debug_out.write('Topic {0} {1}: {2}\n'.format( topic_id, math_utils.entropy(distribution, base=2, normalize=True), zip(terms, term_entropies))) ranked_indices = np.argsort(distribution) top_ranked_indices = ranked_indices[::-1] top_ranked_values = distribution[top_ranked_indices] self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
def compute_normalised_entropy(distribution, base=2): assert distribution.ndim == 2 assert np.allclose(distribution.sum(axis=1), 1.0) entropies = [ math_utils.entropy(distribution[i, :], base=base, normalize=True) for i in range(distribution.shape[0])] return entropies
def compute_normalised_entropy(distribution, base=2): assert distribution.ndim == 2 assert np.allclose(distribution.sum(axis=1), 1.0) entropies = [ math_utils.entropy(distribution[i, :], base=base, normalize=True) for i in range(distribution.shape[0]) ] return entropies