Ejemplo n.º 1
0
    def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", hashtag_count=10):
        if from_time:
            self.f_time = abs(from_time)
        # if from_date:
        #     self.from_date = abs(from_date)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.from_date = datetime.date.today() - datetime.timedelta(1)
                self.to_date = datetime.date.today()
            elif isinstance(from_date, int) and to_date == "Today":
                self.from_date = datetime.date.today() + datetime.timedelta(from_date)
                self.to_date = datetime.date.today()
        if from_time and to_time:
            if isinstance(from_time, int) and to_time == "Now":
                self.from_date = datetime.datetime.now() + datetime.timedelta(hours=from_time)
                self.to_date = datetime.datetime.now()
        all_tweets = Analysis.objects(
            Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal()))
            &
            Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal()))
            &
            Q(hashtags__ne=[])

        ).all()
        self.all_tweets_count = len(all_tweets)
        all_hashtags = []
        for item in all_tweets:
            for hashtag in item.hashtags:
                hashtag = {
                    'title': self.remove_ar(hashtag['text']),
                    'user': item.user_id
                }
                all_hashtags.append(hashtag)
        final_hash_lists = []
        for hashtag in all_hashtags:
            count = self.hashtag_user_count(all_hashtags, hashtag['title'])
            final_hash_lists += [hashtag['title']] * count

        count_all = Counter()
        count_all.update(final_hash_lists)
        self.hashtags = count_all.most_common(hashtag_count)
Ejemplo n.º 2
0
    def generate(self, from_date=None, to_date="Today", from_time=None, to_time="Now", emoji_count=10):
        if from_time:
            self.f_time = abs(from_time)
        # if from_date:
        #     self.from_date = abs(from_date)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.from_date = datetime.date.today() - datetime.timedelta(1)
                self.to_date = datetime.date.today()
            elif isinstance(from_date, int) and to_date == "Today":
                self.from_date = datetime.date.today() + datetime.timedelta(from_date)
                self.to_date = datetime.date.today()
        if from_time and to_time:
            if isinstance(from_time, int) and to_time == "Now":
                self.from_date = datetime.datetime.now() + datetime.timedelta(hours=from_time)
                self.to_date = datetime.datetime.now()
        all_tweets = Analysis.objects(
            Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal()))
            &
            Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal()))

        ).all()
        self.all_tweets_count = len(all_tweets)
        all_emoji = []
        for item in all_tweets:
            for emoji_item in (c for c in item.text if c in emoji.UNICODE_EMOJI):
                the_emoji = {
                    "title": emoji_item,
                    "user": item.user_id
                }
                all_emoji.append(the_emoji)
        final_emoji_lists = []
        for the_emoji in all_emoji:
            count = self.emoji_user_count(all_emoji, the_emoji['title'])
            final_emoji_lists += [the_emoji['title']] * count
        count_all = Counter()
        count_all.update(final_emoji_lists)
        self.emoji = count_all.most_common(emoji_count)
Ejemplo n.º 3
0
    def generate(self,
                 from_date=None,
                 to_date="Today",
                 from_time=None,
                 to_time="Now"):
        if from_time:
            self.from_time = abs(from_time)
        if from_date:
            self.from_date = abs(from_date)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.to_date = datetime.date.today()
                self.date_list = [(self.to_date - datetime.timedelta(x))
                                  for x in range(-24, 1)]
            elif isinstance(from_date, int) and to_date == "Today":
                self.to_date = datetime.date.today()
                self.date_list = [(self.to_date + datetime.timedelta(x))
                                  for x in range(from_date, 1)]
        if from_time and to_time:
            if isinstance(from_time, int) and to_time == "Now":
                self.to_date = datetime.datetime.now().replace(
                    tzinfo=tz.tzlocal())
                self.date_list = [
                    (self.to_date +
                     datetime.timedelta(hours=x)).replace(tzinfo=tz.tzlocal())
                    for x in range(from_time, 1)
                ]
        tw_count = []
        quotes_count = []
        retweet_count = []
        all_tweet_count = []
        all_mention_count = []
        all_media_count = []
        for index, item in enumerate(self.date_list):
            if index == len(self.date_list) - 1:
                break
            tweets = Analysis.objects(
                Q(create_date__gte=self.date_list[index])
                & Q(create_date__lt=self.date_list[index + 1])
                & Q(retweet_count=0)).all()
            tw_count.append(tweets.count())
            # count quotes
            quotes = Analysis.objects(
                Q(create_date__gte=self.date_list[index])
                & Q(create_date__lt=self.date_list[index + 1])
                & Q(is_quote_status=True)).all()
            quotes_count.append(quotes.count())
            # count retweet
            retweet = Analysis.objects(
                Q(create_date__gte=self.date_list[index])
                & Q(create_date__lt=self.date_list[index + 1])
                & Q(retweet_count__gt=0)).all()
            retweet_count.append(retweet.count())
            # user mention #
            all_mention = Analysis.objects(
                Q(create_date__gte=self.date_list[index])
                & Q(create_date__lt=self.date_list[index + 1])
                & Q(user_mentions__ne=[])).all()
            all_mention_count.append(all_mention.count())
            # Media
            all_media = Analysis.objects(
                Q(create_date__gte=self.date_list[index])
                & Q(create_date__lt=self.date_list[index + 1])
                & Q(media_type__ne='')).all()
            all_media_count.append(all_media.count())
            # all tweet
            all_tweet = Analysis.objects(
                Q(create_date__gte=self.date_list[index])
                & Q(create_date__lt=self.date_list[index + 1])).all()
            all_tweet_count.append(all_tweet.count())

        date_chart = pygal.Bar(margin=100,
                               height=1000,
                               width=1000,
                               x_label_rotation=90)
        date_chart.x_labels = map(
            lambda d: jdatetime.datetime.fromgregorian(datetime=d).strftime(
                '%m/%d - %H:%m ')
            if isinstance(d, datetime.datetime) else jdatetime.date.
            fromgregorian(date=d).strftime('%a %m/%d'), self.date_list[:-1])
        date_chart.title = 'Count  of ALL'
        date_chart.add("all_tweet_count", all_tweet_count)
        date_chart.add("tw", tw_count)
        date_chart.add("retweet", retweet_count)
        date_chart.add("quotes", quotes_count)
        date_chart.add("mention", all_mention_count)
        date_chart.add("all_media", all_media_count)

        # # create pie chart
        self.all_tweets_count = sum(all_tweet_count)
        pie_chart = pygal.Pie(inner_radius=.4)
        pie_chart.title = 'From All  - More than 100% - {} tweet'.format(
            self.all_tweets_count)
        pie_chart.add(
            'tw {0:.2f} %'.format(100 * sum(tw_count) / self.all_tweets_count),
            100 * float(sum(tw_count)) / float(self.all_tweets_count))

        pie_chart.add(
            'quotes {0:.2f} %'.format(100 * sum(quotes_count) /
                                      self.all_tweets_count),
            100 * float(sum(quotes_count)) / float(self.all_tweets_count))

        pie_chart.add(
            'retweet {0:.2f} %'.format(100 * sum(retweet_count) /
                                       self.all_tweets_count),
            100 * float(sum(retweet_count)) / float(self.all_tweets_count))

        pie_chart.add(
            'mention {0:.2f} %'.format(100 * sum(all_mention_count) /
                                       self.all_tweets_count),
            100 * float(sum(all_mention_count)) / float(self.all_tweets_count))
        pie_chart.add(
            'media {0:.2f} %'.format(100 * sum(all_media_count) /
                                     self.all_tweets_count),
            100 * float(sum(all_media_count)) / float(self.all_tweets_count))

        # create file
        filename = datetime.datetime.today().strftime('%Y-%m-%d-%H:%m')
        date_chart.render_to_png(path.join(self.d,
                                           'tmp/' + filename + '-chart.png'),
                                 dpi=600)
        self.file_names.append(
            path.join(self.d, 'tmp/' + filename + '-chart.png'))
        pie_chart.render_to_png(path.join(self.d, 'tmp/' + filename +
                                          '-pie-chart.png'),
                                dpi=300)
        self.file_names.append(
            path.join(self.d, 'tmp/' + filename + '-pie-chart.png'))
Ejemplo n.º 4
0
    def save_tweet(data_json):
        if data_json.get('text'):
            # check if tweet is retweet:

            if data_json.get('retweeted_status'):
                retweeted_id = data_json['retweeted_status']['id']
                tw_object = Analysis.objects(tweet_id=retweeted_id)
                # if not exist
                if tw_object.count() == 0 and not data_json['retweeted_status']['entities'].get('user_mentions'):
                    tweet = Analysis()
                    tweet.tweet_id = retweeted_id
                    tweet.text = data_json['text']
                    tweet.clean_text = FetchText.generate(data_json['text'])
                    tweet.user_name = data_json['user']['name']
                    tweet.user_id = data_json['user']['id']
                    tweet.user_screen_name = data_json['user']['screen_name']
                    tweet.user_location = data_json['user']['location']
                    tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'],
                                                                       '%a %b %d %H:%M:%S +0000 %Y')
                    tweet.user_description = data_json['user']['description']
                    tweet.user_followers_count = data_json['user']['followers_count']
                    tweet.user_friends_count = data_json['user']['friends_count']
                    tweet.user_statuses_count = data_json['user']['statuses_count']
                    tweet.user_favourites_count = data_json['user']['favourites_count']
                    tweet.create_date_timestamp_ms = data_json['timestamp_ms']
                    tweet.create_date = datetime.datetime.strptime(data_json['created_at'],
                                                                   '%a %b %d %H:%M:%S +0000 %Y')
                    tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0]
                    tweet.is_quote_status = data_json['is_quote_status']
                    tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else ''
                    tweet.retweet_count = data_json['retweeted_status']['retweet_count']
                    tweet.favorite_count = data_json['retweeted_status']['favorite_count']
                    tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get(
                        'hashtags') else []
                    tweet.save()
                    # print('retweet saved')
                    return
                # if not exist
                elif data_json['retweeted_status']['entities'].get('user_mentions'):
                    tweet = Analysis()
                    tweet.tweet_id = retweeted_id
                    tweet.text = data_json['text']
                    # tweet.clean_text = FetchText.generate(data_json['text'])
                    tweet.user_name = data_json['user']['name']
                    tweet.user_id = data_json['user']['id']
                    tweet.user_screen_name = data_json['user']['screen_name']
                    tweet.user_location = data_json['user']['location']
                    tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'],
                                                                       '%a %b %d %H:%M:%S +0000 %Y')
                    tweet.user_description = data_json['user']['description']
                    tweet.user_followers_count = data_json['user']['followers_count']
                    tweet.user_friends_count = data_json['user']['friends_count']
                    tweet.user_statuses_count = data_json['user']['statuses_count']
                    tweet.user_favourites_count = data_json['user']['favourites_count']
                    tweet.create_date_timestamp_ms = data_json['timestamp_ms']
                    tweet.create_date = datetime.datetime.strptime(data_json['created_at'],
                                                                   '%a %b %d %H:%M:%S +0000 %Y')
                    tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0]
                    tweet.is_quote_status = data_json['is_quote_status']
                    tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else ''
                    tweet.retweet_count = data_json['retweeted_status']['retweet_count']
                    tweet.favorite_count = data_json['retweeted_status']['favorite_count']
                    tweet.user_mentions = data_json['entities']['user_mentions'] if data_json['entities'].get(
                        'user_mentions') else []
                    tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get(
                        'hashtags') else []
                    tweet.save()
                    # print('retweet[\'mention\']  saved')
                    return
                else:
                    tw_object.update(retweet_count=data_json['retweeted_status']['retweet_count'],
                                     favorite_count=data_json['retweeted_status']['favorite_count'])
                    # print('retweet  updated')
                    return
            # save if not retweeted and not mention

            tweet_count = Analysis.objects(tweet_id=data_json['id']).count()
            if not data_json['entities'].get('user_mentions') and tweet_count == 0 and not data_json.get(
                    'retweeted_status'):
                tweet = Analysis()
                tweet.tweet_id = data_json['id']
                tweet.text = data_json['text']
                tweet.clean_text = FetchText.generate(data_json['text'])
                tweet.user_name = data_json['user']['name']
                tweet.user_id = data_json['user']['id']
                tweet.user_screen_name = data_json['user']['screen_name']
                tweet.user_location = data_json['user']['location']
                tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'],
                                                                   '%a %b %d %H:%M:%S +0000 %Y')
                tweet.user_description = data_json['user']['description']
                tweet.user_followers_count = data_json['user']['followers_count']
                tweet.user_friends_count = data_json['user']['friends_count']
                tweet.user_statuses_count = data_json['user']['statuses_count']
                tweet.user_favourites_count = data_json['user']['favourites_count']
                tweet.create_date_timestamp_ms = data_json['timestamp_ms']
                tweet.create_date = datetime.datetime.strptime(data_json['created_at'],
                                                               '%a %b %d %H:%M:%S +0000 %Y')
                tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0]
                tweet.is_quote_status = data_json['is_quote_status']
                tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else ''
                tweet.retweet_count = data_json['retweeted_status']['retweet_count'] if data_json.get(
                    'retweeted_status') else 0
                tweet.favorite_count = data_json['retweeted_status']['favorite_count'] if data_json.get(
                    'retweeted_status') else 0
                tweet.user_mentions = data_json['entities']['user_mentions'] if data_json['entities'].get(
                    'user_mentions') else []
                tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get(
                    'hashtags') else []
                tweet.save()
                # print('tweet saved')
                return
            # if tweet is mention
            elif data_json['entities'].get('user_mentions') and tweet_count == 0 and not data_json.get(
                    'retweeted_status'):
                tweet = Analysis()
                tweet.tweet_id = data_json['id']
                tweet.text = data_json['text']
                # tweet.clean_text = FetchText.generate(data_json['text'])
                tweet.user_name = data_json['user']['name']
                tweet.user_id = data_json['user']['id']
                tweet.user_screen_name = data_json['user']['screen_name']
                tweet.user_location = data_json['user']['location']
                tweet.user_created_at = datetime.datetime.strptime(data_json['user']['created_at'],
                                                                   '%a %b %d %H:%M:%S +0000 %Y')
                tweet.user_description = data_json['user']['description']
                tweet.user_followers_count = data_json['user']['followers_count']
                tweet.user_friends_count = data_json['user']['friends_count']
                tweet.user_statuses_count = data_json['user']['statuses_count']
                tweet.user_favourites_count = data_json['user']['favourites_count']
                tweet.create_date_timestamp_ms = data_json['timestamp_ms']
                tweet.create_date = datetime.datetime.strptime(data_json['created_at'],
                                                               '%a %b %d %H:%M:%S +0000 %Y')
                tweet.source = re.findall(r'<a .*>(.*)</a>', data_json['source'])[0]
                tweet.is_quote_status = data_json['is_quote_status']
                tweet.media_type = data_json['entities']['media'] if data_json['entities'].get('media') else ''
                tweet.retweet_count = data_json['retweeted_status']['retweet_count'] if data_json.get(
                    'retweeted_status') else 0
                tweet.favorite_count = data_json['retweeted_status']['favorite_count'] if data_json.get(
                    'retweeted_status') else 0
                tweet.user_mentions = data_json['entities']['user_mentions'] if data_json['entities'].get(
                    'user_mentions') else []
                tweet.hashtags = data_json['entities']['hashtags'] if data_json['entities'].get(
                    'hashtags') else []
                tweet.save()
                # print('tweet[\'mention\'] saved')
                return
            else:
                return
                # elif tweet_count > 0 and not data_json.get(
                #         'retweeted_status'):
                #     print('dump Tweet -c{}-r{}-f{}'.format(tweet_count, data_json['retweet_count'],
                #                                            data_json['favorite_count']))
                #     return
                # else:
                #     print('some data not in loop ?')

        else:
            return