def test_topics(self): naive = {"Health": 0, "Sports": 0} for pi in self.postinteractions_set: for (topic, weight) in self._interactions_weights(pi): naive[topic] += weight topics = {topic: utils.atan_norm(score) for (topic, score) in naive.items()} self.assertEqual(self.user.topics, topics)
def test_classify(self, _s3_mock): weights = classifier.SimpleWeights.load(classifier.s3_key_xreadlines()) self.assertEqual( weights.classify("About 75% of New York City School students qualify " "for free or reduced-price lunch."), {'Healthcare': 0, 'Education': atan_norm(0.6 + 0.9)} # students, school )
def classify(self, corpus, *topics): """Classify `corpus` based on number of occurrences of words and phrases, and their weights, in the SimpleWeights dictionary. By default, `corpus` is classified for all topics for which there are weights. Alternatively, topics may be specified as arbitrary arguments: SIMPLE_WEIGHTS.classify(corpus, 'healthcare', 'cooking', ...) """ return {topic: atan_norm(score) for (topic, score) in self.iter_topics(corpus, *topics)}
def get_topics(post_interactions, post_topics): """Return a User's interests scored by topic, given an iterable of the user's PostInteractions and a catalog of PostTopics. """ scores = collections.defaultdict(int) for interaction in post_interactions: try: catalogued = post_topics[interaction.postid] except KeyError: topics = {} else: topics = catalogued.document for (topic, value) in topics.items(): # For now, all interactions weighted the same: for (_interaction_type, count) in interaction.document.items(): scores[topic] += value * count # Normalize topic scores to 1: return {topic: atan_norm(value) for (topic, value) in scores.items()}