def run(self, bot):
        self.logger.debug("Fetching tweets...")
        tweet_rows = []
        # fetch the tw users' tweets
        tw_users = list((TwitterUser.select().join(Subscription).group_by(
            TwitterUser).order_by(TwitterUser.last_fetched)))
        updated_tw_users = []
        users_to_cleanup = []

        for tw_user in tw_users:
            try:
                if tw_user.last_tweet_id == 0:
                    # get just the latest tweet
                    self.logger.debug("Fetching latest tweet by {}".format(
                        tw_user.screen_name))
                    tweets = bot.tw.user_timeline(
                        screen_name=tw_user.screen_name, count=1)
                else:
                    # get the fresh tweets
                    self.logger.debug("Fetching new tweets from {}".format(
                        tw_user.screen_name))
                    tweets = bot.tw.user_timeline(
                        screen_name=tw_user.screen_name,
                        since_id=tw_user.last_tweet_id)
                updated_tw_users.append(tw_user)
            except tweepy.error.TweepError as e:
                sc = e.response.status_code
                if sc == 429:
                    self.logger.debug("- Hit ratelimit, breaking.")
                    break

                if sc == 401:
                    users_to_cleanup.append((tw_user, 'PROTECTED'))
                    self.logger.debug(
                        "- Protected tweets here. Cleaning up this user")
                    continue

                if sc == 404:
                    users_to_cleanup.append((tw_user, 'NOTFOUND'))
                    self.logger.debug(
                        "- 404? Maybe screen name changed? Cleaning up this user"
                    )
                    continue

                self.logger.debug(
                    "- Unknown exception, Status code {}".format(sc))
                continue

            for tweet in tweets:
                self.logger.debug("- Got tweet: {}".format(tweet.text))

                # Check if tweet contains media, else check if it contains a link to an image
                extensions = ('.jpg', '.jpeg', '.png', '.gif')
                pattern = '[(%s)]$' % ')('.join(extensions)
                photo_url = ''
                tweet_text = html.unescape(tweet.text)
                if 'media' in tweet.entities:
                    photo_url = tweet.entities['media'][0]['media_url_https']
                else:
                    for url_entity in tweet.entities['urls']:
                        expanded_url = url_entity['expanded_url']
                        if re.search(pattern, expanded_url):
                            photo_url = expanded_url
                            break
                if photo_url:
                    self.logger.debug("- - Found media URL in tweet: " +
                                      photo_url)

                for url_entity in tweet.entities['urls']:
                    expanded_url = url_entity['expanded_url']
                    indices = url_entity['indices']
                    display_url = tweet.text[indices[0]:indices[1]]
                    tweet_text = tweet_text.replace(display_url, expanded_url)

                tw_data = {
                    'tw_id': tweet.id,
                    'text': tweet_text,
                    'created_at': tweet.created_at,
                    'twitter_user': tw_user,
                    'photo_url': photo_url,
                }
                try:
                    t = Tweet.get(Tweet.tw_id == tweet.id)
                    self.logger.warning("Got duplicated tw_id on this tweet:")
                    self.logger.warning(str(tw_data))
                except Tweet.DoesNotExist:
                    if tw_data['text'][0] != '@':
                        tweet_rows.append(tw_data)

                if len(tweet_rows) >= self.TWEET_BATCH_INSERT_COUNT:
                    Tweet.insert_many(tweet_rows).execute()
                    tweet_rows = []

        TwitterUser.update(last_fetched=datetime.now()) \
            .where(TwitterUser.id << [tw.id for tw in updated_tw_users]).execute()

        if not updated_tw_users:
            return

        if tweet_rows:
            Tweet.insert_many(tweet_rows).execute()

        # send the new tweets to subscribers
        subscriptions = list(Subscription.select().where(
            Subscription.tw_user << updated_tw_users))
        for s in subscriptions:
            # are there new tweets? send em all!
            self.logger.debug("Checking subscription {} {}".format(
                s.tg_chat.chat_id, s.tw_user.screen_name))

            if s.last_tweet_id == 0:  # didn't receive any tweet yet
                try:
                    tw = s.tw_user.tweets.select() \
                        .order_by(Tweet.tw_id.desc()) \
                        .first()
                    if tw is None:
                        self.logger.warning(
                            "Something fishy is going on here...")
                    else:
                        bot.send_tweet(s.tg_chat, tw)
                        # save the latest tweet sent on this subscription
                        s.last_tweet_id = tw.tw_id
                        s.save()
                except IndexError:
                    self.logger.debug("- No tweets available yet on {}".format(
                        s.tw_user.screen_name))

                continue

            if s.tw_user.last_tweet_id > s.last_tweet_id:
                self.logger.debug("- Some fresh tweets here!")
                for tw in (s.tw_user.tweets.select().where(
                        Tweet.tw_id > s.last_tweet_id).order_by(
                            Tweet.tw_id.asc())):
                    bot.send_tweet(s.tg_chat, tw)

                # save the latest tweet sent on this subscription
                s.last_tweet_id = s.tw_user.last_tweet_id
                s.save()
                continue

            self.logger.debug("- No new tweets here.")

        self.logger.debug("Starting tw_user cleanup")
        if not users_to_cleanup:
            self.logger.debug("- Nothing to cleanup")
        else:
            for tw_user, reason in users_to_cleanup:
                self.logger.debug("- Cleaning up subs on user @{}, {}".format(
                    tw_user.screen_name, reason))
                message = INFO_CLEANUP[reason].format(tw_user.screen_name)
                subs = list(tw_user.subscriptions)
                for s in subs:
                    chat = s.tg_chat
                    if chat.delete_soon:
                        self.logger.debug(
                            "- - skipping because of delete_soon chatid={}".
                            format(chat_id))
                        continue
                    chat_id = chat.chat_id
                    self.logger.debug("- - bye on chatid={}".format(chat_id))
                    s.delete_instance()

                    try:
                        bot.sendMessage(chat_id=chat_id, text=message)
                    except TelegramError as e:
                        self.logger.info(
                            "Couldn't send unsubscription notice of {} to chat {}: {}"
                            .format(tw_user.screen_name, chat_id, e.message))

                        delet_this = None

                        if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
                            delet_this = True

                        if e.message == "Unauthorized":
                            delet_this = True

                        if delet_this:
                            self.logger.info("Marking chat for deletion")
                            chat.delete_soon = True
                            chat.save()

            self.logger.debug("- Cleaning up TwitterUser @{}".format(
                tw_user.screen_name, reason))
            tw_user.delete_instance()

            self.logger.debug("- Cleanup finished")

        self.logger.debug("Cleaning up TelegramChats marked for deletion")
        for chat in TelegramChat.select().where(
                TelegramChat.delete_soon == True):
            chat.delete_instance(recursive=True)
            self.logger.debug("Deleting chat {}".format(chat.chat_id))
Example #2
0
def FetchAndSendTweetsJob(context_in: CallbackContext) -> None:
    job = context_in.job
    bot = context_in.bot
    job.repeat = True
    job.context = None
    job.name = "FetchAndSendTweetsJob"
    job._remove = Event()
    job._enabled = Event()
    job._enabled.set()
    job.logger = logging.getLogger(job.name)
    job.logger.debug("Fetching tweets...")
    tweet_rows = []
    # fetch the tw users' tweets
    tw_users = list((
        TwitterUser.select().join(Subscription).group_by(TwitterUser).order_by(
            TwitterUser.last_fetched)))
    updated_tw_users = []
    users_to_cleanup = []

    for tw_user in tw_users:
        try:
            if tw_user.last_tweet_id == 0:
                # get just the latest tweet
                job.logger.debug("Fetching latest tweet by {}".format(
                    tw_user.screen_name))
                tweets = bot.tw.user_timeline(screen_name=tw_user.screen_name,
                                              count=1,
                                              tweet_mode='extended')
            else:
                # get the fresh tweets
                job.logger.debug("Fetching new tweets from {}".format(
                    tw_user.screen_name))
                tweets = bot.tw.user_timeline(screen_name=tw_user.screen_name,
                                              since_id=tw_user.last_tweet_id,
                                              tweet_mode='extended')
            updated_tw_users.append(tw_user)
        except tweepy.errors.TweepyException as e:
            sc = e.response.status_code
            if sc == 429:
                job.logger.debug("- Hit ratelimit, breaking.")
                break

            if sc == 401:
                users_to_cleanup.append((tw_user, 'PROTECTED'))
                job.logger.debug(
                    "- Protected tweets here. Cleaning up this user")
                continue

            if sc == 404:
                users_to_cleanup.append((tw_user, 'NOTFOUND'))
                job.logger.debug(
                    "- 404? Maybe screen name changed? Cleaning up this user")
                continue

            job.logger.debug("- Unknown exception, Status code {}".format(sc))
            continue

        for tweet in tweets:
            job.logger.debug("- Got tweet: {}".format(tweet.full_text))

            # Check if tweet contains media, else check if it contains a link to an image
            extensions = ('.jpg', '.jpeg', '.png', '.gif')
            pattern = '[(%s)]$' % ')('.join(extensions)
            photo_url = []
            video_url = ''
            tweet_text = html.unescape(tweet.full_text)
            if 'media' in tweet.entities:
                for imgs in tweet.extended_entities['media']:
                    photo_url.append(imgs['media_url_https'])
                try:
                    if 'video_info' in tweet.extended_entities['media'][0]:
                        # file = open("video_url.txt", "a")
                        # file.write('\n')
                        # file.write(str(tweet.extended_entities['media']))
                        # file.close()
                        max_bit = 0
                        for video_info in tweet.extended_entities['media'][0][
                                'video_info']['variants']:
                            if 'bitrate' in video_info:
                                if (video_info['bitrate'] > max_bit):
                                    video_url = video_info['url']
                                    max_bit = video_info['bitrate']
                except:
                    job.logger.warning(
                        "{} Finding video failed, video url is in the video_url.txt..."
                        .format(tweet.id))
            else:
                for url_entity in tweet.entities['urls']:
                    expanded_url = url_entity['expanded_url']
                    if re.search(pattern, expanded_url):
                        photo_url.append(expanded_url)
                        break
            if len(photo_url) != 0:
                job.logger.debug("- - Found media URL in tweet: " +
                                 photo_url[0])

            for url_entity in tweet.entities['urls']:
                expanded_url = url_entity['expanded_url']
                indices = url_entity['indices']
                display_url = tweet.full_text[indices[0]:indices[1]]
                tweet_text = tweet_text.replace(display_url, expanded_url)

            tw_data = {
                'tw_id': tweet.id,
                'text': tweet_text,
                'created_at': tweet.created_at,
                'twitter_user': tw_user,
                'photo_url': photo_url,
                'video_url': video_url,
            }
            try:
                t = Tweet.get(Tweet.tw_id == tweet.id)
                job.logger.warning("Got duplicated tw_id on this tweet:")
                job.logger.warning(str(tw_data))
            except Tweet.DoesNotExist:
                tweet_rows.append(tw_data)

            if len(tweet_rows) >= 100:
                Tweet.insert_many(tweet_rows).execute()
                tweet_rows = []

    TwitterUser.update(last_fetched=datetime.now()) \
        .where(TwitterUser.id << [tw.id for tw in updated_tw_users]).execute()

    if not updated_tw_users:
        return

    if tweet_rows:
        Tweet.insert_many(tweet_rows).execute()

    # send the new tweets to subscribers
    subscriptions = list(
        Subscription.select().where(Subscription.tw_user << updated_tw_users))
    for s in subscriptions:
        # are there new tweets? send em all!
        job.logger.debug("Checking subscription {} {}".format(
            s.tg_chat.chat_id, s.tw_user.screen_name))

        if s.last_tweet_id == 0:  # didn't receive any tweet yet
            try:
                tw = s.tw_user.tweets.select() \
                    .order_by(Tweet.tw_id.desc()) \
                    .first()
                if tw is None:
                    job.logger.warning("Something fishy is going on here...")
                else:
                    bot.send_tweet(s.tg_chat, tw, s.sub_kind)
                    # save the latest tweet sent on this subscription
                    s.last_tweet_id = tw.tw_id
                    s.save()
            except IndexError:
                job.logger.debug("- No tweets available yet on {}".format(
                    s.tw_user.screen_name))

            continue

        if s.tw_user.last_tweet_id > s.last_tweet_id:
            job.logger.debug("- Some fresh tweets here!")
            for tw in (s.tw_user.tweets.select().where(
                    Tweet.tw_id > s.last_tweet_id).order_by(
                        Tweet.tw_id.asc())):
                bot.send_tweet(s.tg_chat, tw, s.sub_kind)

            # save the latest tweet sent on this subscription
            s.last_tweet_id = s.tw_user.last_tweet_id
            s.save()
            continue

        job.logger.debug("- No new tweets here.")

    job.logger.debug("Starting tw_user cleanup")
    if not users_to_cleanup:
        job.logger.debug("- Nothing to cleanup")
    else:
        for tw_user, reason in users_to_cleanup:
            job.logger.debug("- Cleaning up subs on user @{}, {}".format(
                tw_user.screen_name, reason))
            message = INFO_CLEANUP[reason].format(tw_user.screen_name)
            subs = list(tw_user.subscriptions)
            for s in subs:
                chat = s.tg_chat
                if chat.delete_soon:
                    job.logger.debug(
                        "- - skipping because of delete_soon chatid={}".format(
                            chat_id))
                    continue
                chat_id = chat.chat_id
                job.logger.debug("- - bye on chatid={}".format(chat_id))
                s.delete_instance()

                try:
                    bot.sendMessage(chat_id=chat_id, text=message)
                except TelegramError as e:
                    job.logger.info(
                        "Couldn't send unsubscription notice of {} to chat {}: {}"
                        .format(tw_user.screen_name, chat_id, e.message))

                    delet_this = None

                    if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
                        delet_this = True

                    if e.message == "Unauthorized":
                        delet_this = True

                    if delet_this:
                        job.logger.info("Marking chat for deletion")
                        chat.delete_soon = True
                        chat.save()

        job.logger.debug("- Cleaning up TwitterUser @{} {}".format(
            tw_user.screen_name, reason))
        tw_user.delete_instance()

        job.logger.debug("- Cleanup finished")

    job.logger.debug("Cleaning up TelegramChats marked for deletion")
    for chat in TelegramChat.select().where(TelegramChat.delete_soon == True):
        chat.delete_instance(recursive=True)
        job.logger.debug("Deleting chat {}".format(chat.chat_id))
    def run(self, bot):
        self.logger.debug("Fetching tweets...")
        tweet_rows = []
        # fetch the tw users' tweets
        tw_users = list((TwitterUser.select()
                         .join(Subscription)
                         .group_by(TwitterUser)
                         .order_by(TwitterUser.last_fetched)))
        updated_tw_users = []
        users_to_cleanup = []

        for tw_user in tw_users:
            try:
                if tw_user.last_tweet_id == 0:
                    # get just the latest tweet
                    self.logger.debug(
                        "Fetching latest tweet by {}".format(tw_user.screen_name))
                    tweets = bot.tw.user_timeline(
                        screen_name=tw_user.screen_name,
                        count=1)
                else:
                    # get the fresh tweets
                    self.logger.debug(
                        "Fetching new tweets from {}".format(tw_user.screen_name))
                    tweets = bot.tw.user_timeline(
                        screen_name=tw_user.screen_name,
                        since_id=tw_user.last_tweet_id)
                updated_tw_users.append(tw_user)
            except tweepy.error.TweepError as e:
                sc = e.response.status_code
                if sc == 429:
                    self.logger.debug("- Hit ratelimit, breaking.")
                    break

                if sc == 401:
                    users_to_cleanup.append((tw_user, 'PROTECTED'))
                    self.logger.debug("- Protected tweets here. Cleaning up this user")
                    continue

                if sc == 404:
                    users_to_cleanup.append((tw_user, 'NOTFOUND'))
                    self.logger.debug("- 404? Maybe screen name changed? Cleaning up this user")
                    continue

                self.logger.debug(
                    "- Unknown exception, Status code {}".format(sc))
                continue

            for tweet in tweets:
                self.logger.debug("- Got tweet: {}".format(tweet.text))

                # Check if tweet contains media, else check if it contains a link to an image
                extensions = ('.jpg', '.jpeg', '.png', '.gif')
                pattern = '[(%s)]$' % ')('.join(extensions)
                photo_url = ''
                tweet_text = html.unescape(tweet.text)
                if 'media' in tweet.entities:
                    photo_url = tweet.entities['media'][0]['media_url_https']
                else:
                    for url_entity in tweet.entities['urls']:
                        expanded_url = url_entity['expanded_url']
                        if re.search(pattern, expanded_url):
                            photo_url = expanded_url
                            break
                if photo_url:
                    self.logger.debug("- - Found media URL in tweet: " + photo_url)

                for url_entity in tweet.entities['urls']:
                    expanded_url = url_entity['expanded_url']
                    indices = url_entity['indices']
                    display_url = tweet.text[indices[0]:indices[1]]
                    tweet_text = tweet_text.replace(display_url, expanded_url)

                tw_data = {
                    'tw_id': tweet.id,
                    'text': tweet_text,
                    'created_at': tweet.created_at,
                    'twitter_user': tw_user,
                    'photo_url': photo_url,
                }
                try:
                    t = Tweet.get(Tweet.tw_id == tweet.id)
                    self.logger.warning("Got duplicated tw_id on this tweet:")
                    self.logger.warning(str(tw_data))
                except Tweet.DoesNotExist:
                    tweet_rows.append(tw_data)

                if len(tweet_rows) >= self.TWEET_BATCH_INSERT_COUNT:
                    Tweet.insert_many(tweet_rows).execute()
                    tweet_rows = []

        TwitterUser.update(last_fetched=datetime.now()) \
            .where(TwitterUser.id << [tw.id for tw in updated_tw_users]).execute()

        if not updated_tw_users:
            return

        if tweet_rows:
            Tweet.insert_many(tweet_rows).execute()

        # send the new tweets to subscribers
        subscriptions = list(Subscription.select()
                             .where(Subscription.tw_user << updated_tw_users))
        for s in subscriptions:
            # are there new tweets? send em all!
            self.logger.debug(
                "Checking subscription {} {}".format(s.tg_chat.chat_id, s.tw_user.screen_name))

            if s.last_tweet_id == 0:  # didn't receive any tweet yet
                try:
                    tw = s.tw_user.tweets.select() \
                        .order_by(Tweet.tw_id.desc()) \
                        .first()
                    if tw is None:
                        self.logger.warning("Something fishy is going on here...")
                    else:
                        bot.send_tweet(s.tg_chat, tw)
                        # save the latest tweet sent on this subscription
                        s.last_tweet_id = tw.tw_id
                        s.save()
                except IndexError:
                    self.logger.debug("- No tweets available yet on {}".format(s.tw_user.screen_name))

                continue

            if s.tw_user.last_tweet_id > s.last_tweet_id:
                self.logger.debug("- Some fresh tweets here!")
                for tw in (s.tw_user.tweets.select()
                                    .where(Tweet.tw_id > s.last_tweet_id)
                                    .order_by(Tweet.tw_id.asc())
                           ):
                    bot.send_tweet(s.tg_chat, tw)

                # save the latest tweet sent on this subscription
                s.last_tweet_id = s.tw_user.last_tweet_id
                s.save()
                continue

            self.logger.debug("- No new tweets here.")


        self.logger.debug("Starting tw_user cleanup")
        if not users_to_cleanup:
            self.logger.debug("- Nothing to cleanup")
        else:
            for tw_user, reason in users_to_cleanup:
                self.logger.debug("- Cleaning up subs on user @{}, {}".format(tw_user.screen_name, reason))
                message = INFO_CLEANUP[reason].format(tw_user.screen_name)
                subs = list(tw_user.subscriptions)
                for s in subs:
                    chat = s.tg_chat
                    if chat.delete_soon:
                        self.logger.debug ("- - skipping because of delete_soon chatid={}".format(chat_id))
                        continue
                    chat_id = chat.chat_id
                    self.logger.debug ("- - bye on chatid={}".format(chat_id))
                    s.delete_instance()

                    try:
                        bot.sendMessage(chat_id=chat_id, text=message)
                    except TelegramError as e:
                        self.logger.info("Couldn't send unsubscription notice of {} to chat {}: {}".format(
                            tw_user.screen_name, chat_id, e.message
                        ))

                        delet_this = None

                        if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
                            delet_this = True

                        if e.message == "Unauthorized":
                            delet_this = True

                        if delet_this:
                            self.logger.info("Marking chat for deletion")
                            chat.delete_soon = True
                            chat.save()

            self.logger.debug("- Cleaning up TwitterUser @{}".format(tw_user.screen_name, reason))
            tw_user.delete_instance()

            self.logger.debug ("- Cleanup finished")

        self.logger.debug("Cleaning up TelegramChats marked for deletion")
        for chat in TelegramChat.select().where(TelegramChat.delete_soon == True):
            chat.delete_instance(recursive=True)
            self.logger.debug("Deleting chat {}".format(chat.chat_id))