def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", hashtag_count=10): if from_time: self.f_time = abs(from_time) # if from_date: # self.from_date = abs(from_date) if from_date and to_date: if from_date == to_date and from_date == "Today": # Read the whole text. self.from_date = datetime.date.today() - datetime.timedelta(1) self.to_date = datetime.date.today() elif isinstance(from_date, int) and to_date == "Today": self.from_date = datetime.date.today() + datetime.timedelta(from_date) self.to_date = datetime.date.today() if from_time and to_time: if isinstance(from_time, int) and to_time == "Now": self.from_date = datetime.datetime.now() + datetime.timedelta(hours=from_time) self.to_date = datetime.datetime.now() all_tweets = Analysis.objects( Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal())) & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal())) & Q(hashtags__ne=[]) ).all() self.all_tweets_count = len(all_tweets) all_hashtags = [] for item in all_tweets: for hashtag in item.hashtags: hashtag = { 'title': self.remove_ar(hashtag['text']), 'user': item.user_id } all_hashtags.append(hashtag) final_hash_lists = [] for hashtag in all_hashtags: count = self.hashtag_user_count(all_hashtags, hashtag['title']) final_hash_lists += [hashtag['title']] * count count_all = Counter() count_all.update(final_hash_lists) self.hashtags = count_all.most_common(hashtag_count)
def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", emoji_count=10): if from_time: self.f_time = abs(from_time) # if from_date: # self.from_date = abs(from_date) if from_date and to_date: if from_date == to_date and from_date == "Today": # Read the whole text. self.from_date = datetime.date.today() - datetime.timedelta(1) self.to_date = datetime.date.today() elif isinstance(from_date, int) and to_date == "Today": self.from_date = datetime.date.today() + datetime.timedelta(from_date) self.to_date = datetime.date.today() if from_time and to_time: if isinstance(from_time, int) and to_time == "Now": self.from_date = datetime.datetime.now() + datetime.timedelta(hours=from_time) self.to_date = datetime.datetime.now() all_tweets = Analysis.objects( Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal())) & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal())) ).all() self.all_tweets_count = len(all_tweets) all_emoji = [] for item in all_tweets: for emoji_item in (c for c in item.text if c in emoji.UNICODE_EMOJI): the_emoji = { "title": emoji_item, "user": item.user_id } all_emoji.append(the_emoji) final_emoji_lists = [] for the_emoji in all_emoji: count = self.emoji_user_count(all_emoji, the_emoji['title']) final_emoji_lists += [the_emoji['title']] * count count_all = Counter() count_all.update(final_emoji_lists) self.emoji = count_all.most_common(emoji_count)
def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now"): if from_time: self.from_time = abs(from_time) if from_date: self.from_date = abs(from_date) if from_date and to_date: if from_date == to_date and from_date == "Today": # Read the whole text. self.to_date = datetime.date.today() self.date_list = [(self.to_date - datetime.timedelta(x)) for x in range(-24, 1)] elif isinstance(from_date, int) and to_date == "Today": self.to_date = datetime.date.today() self.date_list = [(self.to_date + datetime.timedelta(x)) for x in range(from_date, 1)] if from_time and to_time: if isinstance(from_time, int) and to_time == "Now": self.to_date = datetime.datetime.now().replace( tzinfo=tz.tzlocal()) self.date_list = [ (self.to_date + datetime.timedelta(hours=x)).replace(tzinfo=tz.tzlocal()) for x in range(from_time, 1) ] tw_count = [] quotes_count = [] retweet_count = [] all_tweet_count = [] all_mention_count = [] all_media_count = [] for index, item in enumerate(self.date_list): if index == len(self.date_list) - 1: break tweets = Analysis.objects( Q(create_date__gte=self.date_list[index]) & Q(create_date__lt=self.date_list[index + 1]) & Q(retweet_count=0)).all() tw_count.append(tweets.count()) # count quotes quotes = Analysis.objects( Q(create_date__gte=self.date_list[index]) & Q(create_date__lt=self.date_list[index + 1]) & Q(is_quote_status=True)).all() quotes_count.append(quotes.count()) # count retweet retweet = Analysis.objects( Q(create_date__gte=self.date_list[index]) & Q(create_date__lt=self.date_list[index + 1]) & Q(retweet_count__gt=0)).all() retweet_count.append(retweet.count()) # user mention # all_mention = Analysis.objects( Q(create_date__gte=self.date_list[index]) & Q(create_date__lt=self.date_list[index + 1]) & Q(user_mentions__ne=[])).all() all_mention_count.append(all_mention.count()) # Media all_media = Analysis.objects( Q(create_date__gte=self.date_list[index]) & Q(create_date__lt=self.date_list[index + 1]) & Q(media_type__ne='')).all() all_media_count.append(all_media.count()) # all tweet all_tweet = Analysis.objects( Q(create_date__gte=self.date_list[index]) & Q(create_date__lt=self.date_list[index + 1])).all() all_tweet_count.append(all_tweet.count()) date_chart = pygal.Bar(margin=100, height=1000, width=1000, x_label_rotation=90) date_chart.x_labels = map( lambda d: jdatetime.datetime.fromgregorian(datetime=d).strftime( '%m/%d - %H:%m ') if isinstance(d, datetime.datetime) else jdatetime.date. fromgregorian(date=d).strftime('%a %m/%d'), self.date_list[:-1]) date_chart.title = 'Count of ALL' date_chart.add("all_tweet_count", all_tweet_count) date_chart.add("tw", tw_count) date_chart.add("retweet", retweet_count) date_chart.add("quotes", quotes_count) date_chart.add("mention", all_mention_count) date_chart.add("all_media", all_media_count) # # create pie chart self.all_tweets_count = sum(all_tweet_count) pie_chart = pygal.Pie(inner_radius=.4) pie_chart.title = 'From All - More than 100% - {} tweet'.format( self.all_tweets_count) pie_chart.add( 'tw {0:.2f} %'.format(100 * sum(tw_count) / self.all_tweets_count), 100 * float(sum(tw_count)) / float(self.all_tweets_count)) pie_chart.add( 'quotes {0:.2f} %'.format(100 * sum(quotes_count) / self.all_tweets_count), 100 * float(sum(quotes_count)) / float(self.all_tweets_count)) pie_chart.add( 'retweet {0:.2f} %'.format(100 * sum(retweet_count) / self.all_tweets_count), 100 * float(sum(retweet_count)) / float(self.all_tweets_count)) pie_chart.add( 'mention {0:.2f} %'.format(100 * sum(all_mention_count) / self.all_tweets_count), 100 * float(sum(all_mention_count)) / float(self.all_tweets_count)) pie_chart.add( 'media {0:.2f} %'.format(100 * sum(all_media_count) / self.all_tweets_count), 100 * float(sum(all_media_count)) / float(self.all_tweets_count)) # create file filename = datetime.datetime.today().strftime('%Y-%m-%d-%H:%m') date_chart.render_to_png(path.join(self.d, 'tmp/' + filename + '-chart.png'), dpi=600) self.file_names.append( path.join(self.d, 'tmp/' + filename + '-chart.png')) pie_chart.render_to_png(path.join(self.d, 'tmp/' + filename + '-pie-chart.png'), dpi=300) self.file_names.append( path.join(self.d, 'tmp/' + filename + '-pie-chart.png'))
def save_tweet(data_json): if data_json.get('text'): # check if tweet is retweet: if data_json.get('retweeted_status'): retweeted_id = data_json['retweeted_status']['id'] tw_object = Analysis.objects(tweet_id=retweeted_id) # if not exist if tw_object.count() == 0 and not data_json['retweeted_status']['entities'].get('user_mentions'): tweet = Analysis() tweet.tweet_id = retweeted_id tweet.text = data_json['text'] tweet.clean_text = FetchText.generate(data_json['text']) tweet.user_name = data_json['user']['name'] tweet.user_id = data_json['user']['id'] tweet.user_screen_name = data_json['user']['screen_name'] tweet.user_location = data_json['user']['location'] tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.user_description = data_json['user']['description'] tweet.user_followers_count = data_json['user']['followers_count'] tweet.user_friends_count = data_json['user']['friends_count'] tweet.user_statuses_count = data_json['user']['statuses_count'] tweet.user_favourites_count = data_json['user']['favourites_count'] tweet.create_date_timestamp_ms = data_json['timestamp_ms'] tweet.create_date = datetime.datetime.strptime(data_json['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0] tweet.is_quote_status = data_json['is_quote_status'] tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else '' tweet.retweet_count = data_json['retweeted_status']['retweet_count'] tweet.favorite_count = data_json['retweeted_status']['favorite_count'] tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get( 'hashtags') else [] tweet.save() # print('retweet saved') return # if not exist elif data_json['retweeted_status']['entities'].get('user_mentions'): tweet = Analysis() tweet.tweet_id = retweeted_id tweet.text = data_json['text'] # tweet.clean_text = FetchText.generate(data_json['text']) tweet.user_name = data_json['user']['name'] tweet.user_id = data_json['user']['id'] tweet.user_screen_name = data_json['user']['screen_name'] tweet.user_location = data_json['user']['location'] tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.user_description = data_json['user']['description'] tweet.user_followers_count = data_json['user']['followers_count'] tweet.user_friends_count = data_json['user']['friends_count'] tweet.user_statuses_count = data_json['user']['statuses_count'] tweet.user_favourites_count = data_json['user']['favourites_count'] tweet.create_date_timestamp_ms = data_json['timestamp_ms'] tweet.create_date = datetime.datetime.strptime(data_json['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0] tweet.is_quote_status = data_json['is_quote_status'] tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else '' tweet.retweet_count = data_json['retweeted_status']['retweet_count'] tweet.favorite_count = data_json['retweeted_status']['favorite_count'] tweet.user_mentions = data_json['entities']['user_mentions'] if data_json['entities'].get( 'user_mentions') else [] tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get( 'hashtags') else [] tweet.save() # print('retweet[\'mention\'] saved') return else: tw_object.update(retweet_count=data_json['retweeted_status']['retweet_count'], favorite_count=data_json['retweeted_status']['favorite_count']) # print('retweet updated') return # save if not retweeted and not mention tweet_count = Analysis.objects(tweet_id=data_json['id']).count() if not data_json['entities'].get('user_mentions') and tweet_count == 0 and not data_json.get( 'retweeted_status'): tweet = Analysis() tweet.tweet_id = data_json['id'] tweet.text = data_json['text'] tweet.clean_text = FetchText.generate(data_json['text']) tweet.user_name = data_json['user']['name'] tweet.user_id = data_json['user']['id'] tweet.user_screen_name = data_json['user']['screen_name'] tweet.user_location = data_json['user']['location'] tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.user_description = data_json['user']['description'] tweet.user_followers_count = data_json['user']['followers_count'] tweet.user_friends_count = data_json['user']['friends_count'] tweet.user_statuses_count = data_json['user']['statuses_count'] tweet.user_favourites_count = data_json['user']['favourites_count'] tweet.create_date_timestamp_ms = data_json['timestamp_ms'] tweet.create_date = datetime.datetime.strptime(data_json['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0] tweet.is_quote_status = data_json['is_quote_status'] tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else '' tweet.retweet_count = data_json['retweeted_status']['retweet_count'] if data_json.get( 'retweeted_status') else 0 tweet.favorite_count = data_json['retweeted_status']['favorite_count'] if data_json.get( 'retweeted_status') else 0 tweet.user_mentions = data_json['entities']['user_mentions'] if data_json['entities'].get( 'user_mentions') else [] tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get( 'hashtags') else [] tweet.save() # print('tweet saved') return # if tweet is mention elif data_json['entities'].get('user_mentions') and tweet_count == 0 and not data_json.get( 'retweeted_status'): tweet = Analysis() tweet.tweet_id = data_json['id'] tweet.text = data_json['text'] # tweet.clean_text = FetchText.generate(data_json['text']) tweet.user_name = data_json['user']['name'] tweet.user_id = data_json['user']['id'] tweet.user_screen_name = data_json['user']['screen_name'] tweet.user_location = data_json['user']['location'] tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.user_description = data_json['user']['description'] tweet.user_followers_count = data_json['user']['followers_count'] tweet.user_friends_count = data_json['user']['friends_count'] tweet.user_statuses_count = data_json['user']['statuses_count'] tweet.user_favourites_count = data_json['user']['favourites_count'] tweet.create_date_timestamp_ms = data_json['timestamp_ms'] tweet.create_date = datetime.datetime.strptime(data_json['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0] tweet.is_quote_status = data_json['is_quote_status'] tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else '' tweet.retweet_count = data_json['retweeted_status']['retweet_count'] if data_json.get( 'retweeted_status') else 0 tweet.favorite_count = data_json['retweeted_status']['favorite_count'] if data_json.get( 'retweeted_status') else 0 tweet.user_mentions = data_json['entities']['user_mentions'] if data_json['entities'].get( 'user_mentions') else [] tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get( 'hashtags') else [] tweet.save() # print('tweet[\'mention\'] saved') return else: return # elif tweet_count > 0 and not data_json.get( # 'retweeted_status'): # print('dump Tweet -c{}-r{}-f{}'.format(tweet_count, data_json['retweet_count'], # data_json['favorite_count'])) # return # else: # print('some data not in loop ?') else: return