def __init__(self): self.clusterer = KMeansClusterer(cluster_count=8) self.time_threshold = datetime.now(pytz.utc) - timedelta(days=1)
class ReputationMonitor(object): def __init__(self): self.clusterer = KMeansClusterer(cluster_count=8) self.time_threshold = datetime.now(pytz.utc) - timedelta(days=1) # Nasty four-nested loop def scan(self): for entity in utility.entities_list: print 'Entity: %s' % entity # Get statistics for whole entity and write results to the database statistics_dict_whole = Statistics.get_stats( self.time_threshold, entity) Tweet_Stat_Table.objects.create( related_entity=entity, total_tweets_count=statistics_dict_whole['total_tweets_count'], negative_count=statistics_dict_whole['negative_count'], reputation_score=statistics_dict_whole['reputation_score']) print '\t Added stats for %s' % entity for reputation_dimension in utility.dimension_list: # Get statistics for each dimension of entities and write results to the database statistics_dict_dimension = Statistics.get_stats( self.time_threshold, entity, dimension=reputation_dimension) Tweet_Stat_Table.objects.create( related_entity=entity, reputation_dimension=reputation_dimension, total_tweets_count=statistics_dict_dimension[ 'total_tweets_count'], negative_count=statistics_dict_dimension['negative_count'], reputation_score=statistics_dict_dimension[ 'reputation_score']) print '\t Added stats for %s:%s' % (entity, reputation_dimension) # Clustering, extract topics and send out alerts # print '\t Dimension %s' % reputation_dimension try: self.clusterer.cluster_tweets( related_entity=entity, reputation_dimension=reputation_dimension, time_threshold=self.time_threshold) tweets_clusters = self.clusterer.get_tweets_clustered() for cluster in tweets_clusters: negative_count = 0 tweets_count = len(cluster) notify = False # Count negative tweets in each cluster for tweet_orm in cluster: if negative_count > tweets_count * 0.5: print negative_count, tweets_count notify = True break if utility.is_negative( tweet_orm.tweet['sentiment_score']): negative_count += 1 if notify: try: topic_extractor = LDATopicExtractor(cluster) topic_str = user_util.get_topics( topic_extractor.extract_topic()) print '\t\t cluster_topic: %s' % topic_str self.__notify__(entity, reputation_dimension, cluster, topic_str) except ValueError: print '\t\t No tweet in the cluster' except ValueError: print '\t No tweet for the %s' 's %s' % ( entity, reputation_dimension) @staticmethod @transaction.atomic def __notify__(entity, reputation_dimension, tweets_in_cluster, topic_str): # Construct a message message = Message(entity=entity, reputation_dimension=reputation_dimension, topic_str=topic_str) # Message must be saved before associating tweets to it message.save() for tweet in tweets_in_cluster: message.tweet.add(tweet) # For each user that is interested in the entity, add the message to their message set ue_pairs = UserEntity.objects.filter(entity=entity) for ue_pair in ue_pairs: um_pair = UserMessage.objects.create(user=ue_pair.user) um_pair.save() um_pair.message.add(message)
class ReputationMonitor(object): def __init__(self): self.clusterer = KMeansClusterer(cluster_count=8) self.time_threshold = datetime.now(pytz.utc) - timedelta(days=1) # Nasty four-nested loop def scan(self): for entity in utility.entities_list: print 'Entity: %s' % entity # Get statistics for whole entity and write results to the database statistics_dict_whole = Statistics.get_stats(self.time_threshold, entity) Tweet_Stat_Table.objects.create(related_entity=entity, total_tweets_count=statistics_dict_whole['total_tweets_count'], negative_count=statistics_dict_whole['negative_count'], reputation_score=statistics_dict_whole['reputation_score']) print '\t Added stats for %s' % entity for reputation_dimension in utility.dimension_list: # Get statistics for each dimension of entities and write results to the database statistics_dict_dimension = Statistics.get_stats(self.time_threshold, entity, dimension=reputation_dimension) Tweet_Stat_Table.objects.create(related_entity=entity, reputation_dimension=reputation_dimension, total_tweets_count=statistics_dict_dimension['total_tweets_count'], negative_count=statistics_dict_dimension['negative_count'], reputation_score=statistics_dict_dimension['reputation_score']) print '\t Added stats for %s:%s' % (entity, reputation_dimension) # Clustering, extract topics and send out alerts # print '\t Dimension %s' % reputation_dimension try: self.clusterer.cluster_tweets(related_entity=entity, reputation_dimension=reputation_dimension, time_threshold=self.time_threshold) tweets_clusters = self.clusterer.get_tweets_clustered() for cluster in tweets_clusters: negative_count = 0 tweets_count = len(cluster) notify = False # Count negative tweets in each cluster for tweet_orm in cluster: if negative_count > tweets_count * 0.5: print negative_count, tweets_count notify = True break if utility.is_negative(tweet_orm.tweet['sentiment_score']): negative_count += 1 if notify: try: topic_extractor = LDATopicExtractor(cluster) topic_str = user_util.get_topics(topic_extractor.extract_topic()) print '\t\t cluster_topic: %s' % topic_str self.__notify__(entity, reputation_dimension, cluster, topic_str) except ValueError: print '\t\t No tweet in the cluster' except ValueError: print '\t No tweet for the %s''s %s' % (entity, reputation_dimension) @staticmethod @transaction.atomic def __notify__(entity, reputation_dimension, tweets_in_cluster, topic_str): # Construct a message message = Message(entity=entity, reputation_dimension=reputation_dimension, topic_str=topic_str) # Message must be saved before associating tweets to it message.save() for tweet in tweets_in_cluster: message.tweet.add(tweet) # For each user that is interested in the entity, add the message to their message set ue_pairs = UserEntity.objects.filter(entity=entity) for ue_pair in ue_pairs: um_pair = UserMessage.objects.create(user=ue_pair.user) um_pair.save() um_pair.message.add(message)