def process(self, payload, distribution, topic_id): terms = list(map(lambda id: self.tokens[id], payload)) term_entropies = compute_normalised_entropy( distribution, base=2) distribution = inference.aggregate_distribution( distribution, mode='product', axis=0) assert distribution.ndim == 1 distribution /= distribution.sum() if not np.isclose(distribution.sum(), 1.0): logging.error('Encountered non-normalized ' 'distribution for topic "%s" ' '(mass=%.10f).', topic_id, distribution.sum()) self.f_debug_out.write('Topic {0} {1}: {2}\n'.format( topic_id, math_utils.entropy( distribution, base=2, normalize=True), zip(terms, term_entropies))) ranked_indices = np.argsort(distribution) top_ranked_indices = ranked_indices[::-1] top_ranked_values = distribution[top_ranked_indices] self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
def process(self, payload, distribution, topic_id): terms = list(map(lambda id: self.tokens[id], payload)) term_entropies = compute_normalised_entropy(distribution, base=2) distribution = inference.aggregate_distribution(distribution, mode='product', axis=0) assert distribution.ndim == 1 distribution /= distribution.sum() if not np.isclose(distribution.sum(), 1.0): logging.error( 'Encountered non-normalized ' 'distribution for topic "%s" ' '(mass=%.10f).', topic_id, distribution.sum()) self.f_debug_out.write('Topic {0} {1}: {2}\n'.format( topic_id, math_utils.entropy(distribution, base=2, normalize=True), zip(terms, term_entropies))) ranked_indices = np.argsort(distribution) top_ranked_indices = ranked_indices[::-1] top_ranked_values = distribution[top_ranked_indices] self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
def process(self, payload, result, topic_id): terms = list(map(lambda id: self.tokens[id], payload)) term_projections = inference.aggregate_distribution(result, mode='identity', axis=0) if term_projections.ndim == 1: term_projections = term_projections.reshape(1, -1) _, entity_representation_size = term_projections.shape assert (entity_representation_size == self.model_args.entity_representation_size) if self.normalize_representations: term_projections_l2_norm = \ np.linalg.norm(term_projections, axis=1)[:, np.newaxis] term_projections /= term_projections_l2_norm logging.debug('Querying kneighbors for %s.', terms) distances, indices = self.query(term_projections) assert indices.shape[0] == term_projections.shape[0] candidates = collections.defaultdict(float) assert indices.shape[0] == 1 for term in range(indices.shape[0]): term_indices = indices[term, :] for rank, candidate in enumerate(term_indices): matching_score = np.sum( self.entity_representations[candidate, :] * term_projections[term, :]) if self.normalize_representations: matching_score = (matching_score + 1.0) / 2.0 candidates[candidate] += matching_score top_ranked_indices, top_ranked_values = \ map(np.array, zip( *sorted(candidates.items(), reverse=True, key=operator.itemgetter(1)))) self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
def process(self, payload, result, topic_id): terms = list(map(lambda id: self.tokens[id], payload)) term_projections = inference.aggregate_distribution( result, mode='identity', axis=0) if term_projections.ndim == 1: term_projections = term_projections.reshape(1, -1) _, entity_representation_size = term_projections.shape assert(entity_representation_size == self.model_args.entity_representation_size) if self.normalize_representations: term_projections_l2_norm = \ np.linalg.norm(term_projections, axis=1)[:, np.newaxis] term_projections /= term_projections_l2_norm logging.debug('Querying kneighbors for %s.', terms) distances, indices = self.query(term_projections) assert indices.shape[0] == term_projections.shape[0] candidates = collections.defaultdict(float) assert indices.shape[0] == 1 for term in range(indices.shape[0]): term_indices = indices[term, :] for rank, candidate in enumerate(term_indices): matching_score = np.sum( self.entity_representations[candidate, :] * term_projections[term, :]) if self.normalize_representations: matching_score = (matching_score + 1.0) / 2.0 candidates[candidate] += matching_score top_ranked_indices, top_ranked_values = \ map(np.array, zip( *sorted(candidates.items(), reverse=True, key=operator.itemgetter(1)))) self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)