Example #1
0
	def get_topic_similarity(self,user_data):
		""" Returns cosine similarity between topic scores for two users"""

		#=====[ Extracts tweet text from user data  ]=====
		tweets = [utils.get_tweets(data) for data in user_data]
		tweets = [self.format_tweets(tweet_set) for tweet_set in tweets]

		#=====[ Gets topics for list of tweets  ]=====
		all_tweet_topics = [self.get_topics(tweet_set) for tweet_set in tweets]

		scores = []

		#=====[ Rehydrates a vector in order to take cosine similarity 
		for topics in all_tweet_topics:
			topic_scores = [0]*100
			for score in topics:
				topic_scores[score[0]] = score[1]
			scores.append(topic_scores)

		similarity = cosine_similarity(scores[0:1], scores)
		similarity = int(float("%.3f" % similarity[0][1])*100)

		return similarity
Example #2
0
	def distill_top_topics(self,user_data, topn):
		
		#=====[ Extracts tweet text from user data  ]=====
		tweets = utils.get_tweets(user_data)
		tweets = self.format_tweets(tweets)

		#=====[ Gets topics for list of tweets  ]=====
		all_tweet_topics = self.get_topics(tweets)
		topic_scores = {}

		#=====[ Aggregates scores for each topic from each tweet  ]=====
		for topic in all_tweet_topics:
			if topic[1] > 0.15:
				topic_scores[topic[0]] = topic[1]

		#=====[ Sorts topics and returns #topn of them  ]=====
		sorted_topics = sorted(topic_scores.items(), key=operator.itemgetter(1))
		top_topics = []
		for topic in sorted_topics:
			topic_name = self.topic_names[topic[0]]
			if topic_name not in top_topics:
				top_topics.append(topic_name)
		
		return top_topics