Esempio n. 1
0
    def process(self, payload, distribution, topic_id):
        terms = list(map(lambda id: self.tokens[id], payload))
        term_entropies = compute_normalised_entropy(
            distribution, base=2)

        distribution = inference.aggregate_distribution(
            distribution, mode='product', axis=0)

        assert distribution.ndim == 1

        distribution /= distribution.sum()

        if not np.isclose(distribution.sum(), 1.0):
            logging.error('Encountered non-normalized '
                          'distribution for topic "%s" '
                          '(mass=%.10f).',
                          topic_id, distribution.sum())

        self.f_debug_out.write('Topic {0} {1}: {2}\n'.format(
            topic_id,
            math_utils.entropy(
                distribution, base=2, normalize=True),
            zip(terms, term_entropies)))

        ranked_indices = np.argsort(distribution)
        top_ranked_indices = ranked_indices[::-1]

        top_ranked_values = distribution[top_ranked_indices]

        self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
Esempio n. 2
0
    def process(self, payload, distribution, topic_id):
        terms = list(map(lambda id: self.tokens[id], payload))
        term_entropies = compute_normalised_entropy(distribution, base=2)

        distribution = inference.aggregate_distribution(distribution,
                                                        mode='product',
                                                        axis=0)

        assert distribution.ndim == 1

        distribution /= distribution.sum()

        if not np.isclose(distribution.sum(), 1.0):
            logging.error(
                'Encountered non-normalized '
                'distribution for topic "%s" '
                '(mass=%.10f).', topic_id, distribution.sum())

        self.f_debug_out.write('Topic {0} {1}: {2}\n'.format(
            topic_id, math_utils.entropy(distribution, base=2, normalize=True),
            zip(terms, term_entropies)))

        ranked_indices = np.argsort(distribution)
        top_ranked_indices = ranked_indices[::-1]

        top_ranked_values = distribution[top_ranked_indices]

        self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
Esempio n. 3
0
    def process(self, payload, result, topic_id):
        terms = list(map(lambda id: self.tokens[id], payload))

        term_projections = inference.aggregate_distribution(result,
                                                            mode='identity',
                                                            axis=0)

        if term_projections.ndim == 1:
            term_projections = term_projections.reshape(1, -1)

        _, entity_representation_size = term_projections.shape
        assert (entity_representation_size ==
                self.model_args.entity_representation_size)

        if self.normalize_representations:
            term_projections_l2_norm = \
                np.linalg.norm(term_projections, axis=1)[:, np.newaxis]
            term_projections /= term_projections_l2_norm

        logging.debug('Querying kneighbors for %s.', terms)

        distances, indices = self.query(term_projections)

        assert indices.shape[0] == term_projections.shape[0]

        candidates = collections.defaultdict(float)

        assert indices.shape[0] == 1

        for term in range(indices.shape[0]):
            term_indices = indices[term, :]

            for rank, candidate in enumerate(term_indices):
                matching_score = np.sum(
                    self.entity_representations[candidate, :] *
                    term_projections[term, :])

                if self.normalize_representations:
                    matching_score = (matching_score + 1.0) / 2.0

                candidates[candidate] += matching_score

        top_ranked_indices, top_ranked_values = \
            map(np.array, zip(
                *sorted(candidates.items(),
                        reverse=True,
                        key=operator.itemgetter(1))))

        self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)
Esempio n. 4
0
    def process(self, payload, result, topic_id):
        terms = list(map(lambda id: self.tokens[id], payload))

        term_projections = inference.aggregate_distribution(
            result, mode='identity', axis=0)

        if term_projections.ndim == 1:
            term_projections = term_projections.reshape(1, -1)

        _, entity_representation_size = term_projections.shape
        assert(entity_representation_size ==
               self.model_args.entity_representation_size)

        if self.normalize_representations:
            term_projections_l2_norm = \
                np.linalg.norm(term_projections, axis=1)[:, np.newaxis]
            term_projections /= term_projections_l2_norm

        logging.debug('Querying kneighbors for %s.', terms)

        distances, indices = self.query(term_projections)

        assert indices.shape[0] == term_projections.shape[0]

        candidates = collections.defaultdict(float)

        assert indices.shape[0] == 1

        for term in range(indices.shape[0]):
            term_indices = indices[term, :]

            for rank, candidate in enumerate(term_indices):
                matching_score = np.sum(
                    self.entity_representations[candidate, :] *
                    term_projections[term, :])

                if self.normalize_representations:
                    matching_score = (matching_score + 1.0) / 2.0

                candidates[candidate] += matching_score

        top_ranked_indices, top_ranked_values = \
            map(np.array, zip(
                *sorted(candidates.items(),
                        reverse=True,
                        key=operator.itemgetter(1))))

        self.rank_callback(topic_id, top_ranked_indices, top_ranked_values)