예제 #1
0
파일: job.py 프로젝트: AnCh7/twtr2telega
    def run(self, bot):
        self.logger.debug("Fetching tweets...")
        tweet_rows = []
        tw_users = list((TwitterUser.select()
                         .join(Subscription)
                         .group_by(TwitterUser)
                         .order_by(TwitterUser.last_fetched)))
        updated_tw_users = []
        users_to_cleanup = []

        for tw_user in tw_users:
            try:
                if tw_user.last_tweet_id == 0:
                    self.logger.debug("Fetching latest tweet by {}".format(tw_user.screen_name))
                    tweets = bot.tw.user_timeline(screen_name=tw_user.screen_name,
                                                  count=1,
                                                  tweet_mode='extended',
                                                  include_rts=False,
                                                  exclude_replies=True)
                else:
                    self.logger.debug("Fetching new tweets from {}".format(tw_user.screen_name))
                    tweets = bot.tw.user_timeline(screen_name=tw_user.screen_name,
                                                  since_id=tw_user.last_tweet_id,
                                                  tweet_mode='extended',
                                                  include_rts=False,
                                                  exclude_replies=True)
                updated_tw_users.append(tw_user)
            except tweepy.error.TweepError as e:
                sc = e.response.status_code
                if sc == 429:
                    self.logger.debug("- Hit rate limit, breaking.")
                    break
                if sc == 401:
                    users_to_cleanup.append((tw_user, 'PROTECTED'))
                    self.logger.debug("- Protected tweets here. Cleaning up this user")
                    continue
                if sc == 404:
                    users_to_cleanup.append((tw_user, 'NOTFOUND'))
                    self.logger.debug("- 404? Maybe screen name changed? Cleaning up this user")
                    continue
                self.logger.debug("- Unknown exception, Status code {}".format(sc))
                continue

            for tweet in tweets:
                self.logger.debug("- Got tweet: {}".format(tweet.full_text))

                # Check if tweet contains media, else check if it contains a link to an image
                extensions = ('.jpg', '.jpeg', '.png', '.gif')
                pattern = '[(%subscription)]$' % ')('.join(extensions)
                photo_url = ''
                tweet_text = html.unescape(tweet.full_text)
                if 'media' in tweet.entities:
                    photo_url = tweet.entities['media'][0]['media_url_https']
                else:
                    for url_entity in tweet.entities['urls']:
                        expanded_url = url_entity['expanded_url']
                        if re.search(pattern, expanded_url):
                            photo_url = expanded_url
                            break
                if photo_url:
                    self.logger.debug("- - Found media URL in tweet: " + photo_url)

                for url_entity in tweet.entities['urls']:
                    expanded_url = url_entity['expanded_url']
                    indices = url_entity['indices']
                    display_url = tweet.full_text[indices[0]:indices[1]]
                    tweet_text = tweet_text.replace(display_url, expanded_url)

                tw_data = {
                    'tw_id': tweet.id,
                    'text': tweet_text,
                    'created_at': tweet.created_at,
                    'twitter_user': tw_user,
                    'photo_url': photo_url,
                }

                try:
                    t = Tweet.get(Tweet.tw_id == tweet.id)
                    self.logger.warning("Got duplicated tw_id on this tweet:")
                    self.logger.warning(str(tw_data))
                except Tweet.DoesNotExist:
                    tweet_rows.append(tw_data)

                if len(tweet_rows) >= self.TWEET_BATCH_INSERT_COUNT:
                    Tweet.insert_many(tweet_rows).execute()
                    tweet_rows = []

        TwitterUser \
            .update(last_fetched=datetime.now()) \
            .where(TwitterUser.id << [tw.id for tw in updated_tw_users]) \
            .execute()

        if not updated_tw_users:
            return

        if tweet_rows:
            Tweet.insert_many(tweet_rows).execute()

        # send new tweets to subscribers
        subscriptions = list(Subscription.select().where(Subscription.tw_user << updated_tw_users))
        for subscription in subscriptions:
            # send all new tweets
            self.logger.debug("Checking subscription {} {}".format(subscription.tg_chat.chat_id, subscription.tw_user.screen_name))

            if subscription.last_tweet_id == 0:  # didn't receive any tweet yet
                try:
                    tw = subscription.tw_user.tweets.select() \
                        .order_by(Tweet.tw_id.desc()) \
                        .first()
                    if tw is None:
                        self.logger.warning("Something fishy is going on here...")
                    else:
                        bot.send_tweet(subscription.tg_chat, tw)
                        # save the latest tweet sent on this subscription
                        subscription.last_tweet_id = tw.tw_id
                        subscription.save()
                except IndexError:
                    self.logger.debug("- No tweets available yet on {}".format(subscription.tw_user.screen_name))

                continue

            if subscription.tw_user.last_tweet_id > subscription.last_tweet_id:
                self.logger.debug("- Some fresh tweets here!")
                for tw in (subscription.tw_user.tweets.select().where(Tweet.tw_id > subscription.last_tweet_id).order_by(Tweet.tw_id.asc())):
                    bot.send_tweet(subscription.tg_chat, tw)

                # save the latest tweet sent on this subscription
                subscription.last_tweet_id = subscription.tw_user.last_tweet_id
                subscription.save()
                continue

            self.logger.debug("- No new tweets here.")

        self.logger.debug("Starting tw_user cleanup")
        if not users_to_cleanup:
            self.logger.debug("- Nothing to cleanup")
        else:
            for tw_user, reason in users_to_cleanup:
                self.logger.debug("- Cleaning up subs on user @{}, {}".format(tw_user.screen_name, reason))
                message = INFO_CLEANUP[reason].format(tw_user.screen_name)
                subs = list(tw_user.subscriptions)
                for subscription in subs:
                    chat = subscription.tg_chat
                    if chat.delete_soon:
                        self.logger.debug("- - skipping because of delete_soon chat id={}".format(chat_id))
                        continue
                    chat_id = chat.chat_id
                    self.logger.debug("- - bye on chat id={}".format(chat_id))
                    subscription.delete_instance()

                    try:
                        bot.sendMessage(chat_id=chat_id, text=message)
                    except TelegramError as e:
                        self.logger.info("Couldn't send unsubscription notice of {} to chat {}: {}"
                                         .format(tw_user.screen_name, chat_id, e.message))

                        delete_this = None
                        if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
                            delete_this = True
                        if e.message == "Unauthorized":
                            delete_this = True
                        if delete_this:
                            self.logger.info("Marking chat for deletion")
                            chat.delete_soon = True
                            chat.save()

            self.logger.debug("- Cleaning up TwitterUser @{}".format(tw_user.screen_name, reason))
            tw_user.delete_instance()

            self.logger.debug("- Cleanup finished")

        self.logger.debug("Cleaning up TelegramChats marked for deletion")
        for chat in TelegramChat.select().where(TelegramChat.delete_soon == True):
            chat.delete_instance(recursive=True)
            self.logger.debug("Deleting chat {}".format(chat.chat_id))
예제 #2
0
def FetchAndSendTweetsJob(context_in: CallbackContext) -> None:
    job = context_in.job
    bot = context_in.bot
    job.repeat = True
    job.context = None
    job.name = "FetchAndSendTweetsJob"
    job._remove = Event()
    job._enabled = Event()
    job._enabled.set()
    job.logger = logging.getLogger(job.name)
    job.logger.debug("Fetching tweets...")
    tweet_rows = []
    # fetch the tw users' tweets
    tw_users = list((
        TwitterUser.select().join(Subscription).group_by(TwitterUser).order_by(
            TwitterUser.last_fetched)))
    updated_tw_users = []
    users_to_cleanup = []

    for tw_user in tw_users:
        try:
            if tw_user.last_tweet_id == 0:
                # get just the latest tweet
                job.logger.debug("Fetching latest tweet by {}".format(
                    tw_user.screen_name))
                tweets = bot.tw.user_timeline(screen_name=tw_user.screen_name,
                                              count=1,
                                              tweet_mode='extended')
            else:
                # get the fresh tweets
                job.logger.debug("Fetching new tweets from {}".format(
                    tw_user.screen_name))
                tweets = bot.tw.user_timeline(screen_name=tw_user.screen_name,
                                              since_id=tw_user.last_tweet_id,
                                              tweet_mode='extended')
            updated_tw_users.append(tw_user)
        except tweepy.errors.TweepyException as e:
            sc = e.response.status_code
            if sc == 429:
                job.logger.debug("- Hit ratelimit, breaking.")
                break

            if sc == 401:
                users_to_cleanup.append((tw_user, 'PROTECTED'))
                job.logger.debug(
                    "- Protected tweets here. Cleaning up this user")
                continue

            if sc == 404:
                users_to_cleanup.append((tw_user, 'NOTFOUND'))
                job.logger.debug(
                    "- 404? Maybe screen name changed? Cleaning up this user")
                continue

            job.logger.debug("- Unknown exception, Status code {}".format(sc))
            continue

        for tweet in tweets:
            job.logger.debug("- Got tweet: {}".format(tweet.full_text))

            # Check if tweet contains media, else check if it contains a link to an image
            extensions = ('.jpg', '.jpeg', '.png', '.gif')
            pattern = '[(%s)]$' % ')('.join(extensions)
            photo_url = []
            video_url = ''
            tweet_text = html.unescape(tweet.full_text)
            if 'media' in tweet.entities:
                for imgs in tweet.extended_entities['media']:
                    photo_url.append(imgs['media_url_https'])
                try:
                    if 'video_info' in tweet.extended_entities['media'][0]:
                        # file = open("video_url.txt", "a")
                        # file.write('\n')
                        # file.write(str(tweet.extended_entities['media']))
                        # file.close()
                        max_bit = 0
                        for video_info in tweet.extended_entities['media'][0][
                                'video_info']['variants']:
                            if 'bitrate' in video_info:
                                if (video_info['bitrate'] > max_bit):
                                    video_url = video_info['url']
                                    max_bit = video_info['bitrate']
                except:
                    job.logger.warning(
                        "{} Finding video failed, video url is in the video_url.txt..."
                        .format(tweet.id))
            else:
                for url_entity in tweet.entities['urls']:
                    expanded_url = url_entity['expanded_url']
                    if re.search(pattern, expanded_url):
                        photo_url.append(expanded_url)
                        break
            if len(photo_url) != 0:
                job.logger.debug("- - Found media URL in tweet: " +
                                 photo_url[0])

            for url_entity in tweet.entities['urls']:
                expanded_url = url_entity['expanded_url']
                indices = url_entity['indices']
                display_url = tweet.full_text[indices[0]:indices[1]]
                tweet_text = tweet_text.replace(display_url, expanded_url)

            tw_data = {
                'tw_id': tweet.id,
                'text': tweet_text,
                'created_at': tweet.created_at,
                'twitter_user': tw_user,
                'photo_url': photo_url,
                'video_url': video_url,
            }
            try:
                t = Tweet.get(Tweet.tw_id == tweet.id)
                job.logger.warning("Got duplicated tw_id on this tweet:")
                job.logger.warning(str(tw_data))
            except Tweet.DoesNotExist:
                tweet_rows.append(tw_data)

            if len(tweet_rows) >= 100:
                Tweet.insert_many(tweet_rows).execute()
                tweet_rows = []

    TwitterUser.update(last_fetched=datetime.now()) \
        .where(TwitterUser.id << [tw.id for tw in updated_tw_users]).execute()

    if not updated_tw_users:
        return

    if tweet_rows:
        Tweet.insert_many(tweet_rows).execute()

    # send the new tweets to subscribers
    subscriptions = list(
        Subscription.select().where(Subscription.tw_user << updated_tw_users))
    for s in subscriptions:
        # are there new tweets? send em all!
        job.logger.debug("Checking subscription {} {}".format(
            s.tg_chat.chat_id, s.tw_user.screen_name))

        if s.last_tweet_id == 0:  # didn't receive any tweet yet
            try:
                tw = s.tw_user.tweets.select() \
                    .order_by(Tweet.tw_id.desc()) \
                    .first()
                if tw is None:
                    job.logger.warning("Something fishy is going on here...")
                else:
                    bot.send_tweet(s.tg_chat, tw, s.sub_kind)
                    # save the latest tweet sent on this subscription
                    s.last_tweet_id = tw.tw_id
                    s.save()
            except IndexError:
                job.logger.debug("- No tweets available yet on {}".format(
                    s.tw_user.screen_name))

            continue

        if s.tw_user.last_tweet_id > s.last_tweet_id:
            job.logger.debug("- Some fresh tweets here!")
            for tw in (s.tw_user.tweets.select().where(
                    Tweet.tw_id > s.last_tweet_id).order_by(
                        Tweet.tw_id.asc())):
                bot.send_tweet(s.tg_chat, tw, s.sub_kind)

            # save the latest tweet sent on this subscription
            s.last_tweet_id = s.tw_user.last_tweet_id
            s.save()
            continue

        job.logger.debug("- No new tweets here.")

    job.logger.debug("Starting tw_user cleanup")
    if not users_to_cleanup:
        job.logger.debug("- Nothing to cleanup")
    else:
        for tw_user, reason in users_to_cleanup:
            job.logger.debug("- Cleaning up subs on user @{}, {}".format(
                tw_user.screen_name, reason))
            message = INFO_CLEANUP[reason].format(tw_user.screen_name)
            subs = list(tw_user.subscriptions)
            for s in subs:
                chat = s.tg_chat
                if chat.delete_soon:
                    job.logger.debug(
                        "- - skipping because of delete_soon chatid={}".format(
                            chat_id))
                    continue
                chat_id = chat.chat_id
                job.logger.debug("- - bye on chatid={}".format(chat_id))
                s.delete_instance()

                try:
                    bot.sendMessage(chat_id=chat_id, text=message)
                except TelegramError as e:
                    job.logger.info(
                        "Couldn't send unsubscription notice of {} to chat {}: {}"
                        .format(tw_user.screen_name, chat_id, e.message))

                    delet_this = None

                    if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
                        delet_this = True

                    if e.message == "Unauthorized":
                        delet_this = True

                    if delet_this:
                        job.logger.info("Marking chat for deletion")
                        chat.delete_soon = True
                        chat.save()

        job.logger.debug("- Cleaning up TwitterUser @{} {}".format(
            tw_user.screen_name, reason))
        tw_user.delete_instance()

        job.logger.debug("- Cleanup finished")

    job.logger.debug("Cleaning up TelegramChats marked for deletion")
    for chat in TelegramChat.select().where(TelegramChat.delete_soon == True):
        chat.delete_instance(recursive=True)
        job.logger.debug("Deleting chat {}".format(chat.chat_id))
예제 #3
0
	def run(self, bot):
		self.logger.debug('Fetching tweets...')
		tweet_rows = []
		# fetch the tw users' tweets
		tw_users = list((TwitterUser.select()
						.join(Subscription)
						.group_by(TwitterUser)
						.order_by(TwitterUser.last_fetched)))
		updated_tw_users = []
		users_to_cleanup = []

		for tw_user in tw_users:
			try:
				if tw_user.last_tweet_id == -1:
					# get just the latest tweet
					self.logger.debug(
						'Fetching latest tweet by {}'.format(tw_user.screen_name))
					tweets = bot.tw.user_timeline(
						screen_name=tw_user.screen_name,
						count=1,
						tweet_mode='extended')
				else:
					# get the fresh tweets
					self.logger.debug(
						'Fetching new tweets from {}'.format(tw_user.screen_name))
					tweets = bot.tw.user_timeline(
						screen_name=tw_user.screen_name,
						since_id=tw_user.last_tweet_id,
						tweet_mode='extended')
				updated_tw_users.append(tw_user)
			except tweepy.error.TweepError as e:
				sc = e.response.status_code
				if sc == 429:
					self.logger.debug('- Hit ratelimit, breaking.')
					break

				if sc == 401:
					users_to_cleanup.append((tw_user, 'PROTECTED'))
					self.logger.debug('- Protected tweets here. Cleaning up this user')
					continue

				if sc == 404:
					users_to_cleanup.append((tw_user, 'NOTFOUND'))
					self.logger.debug('- 404? Maybe screen name changed? Cleaning up this user')
					continue

				self.logger.debug(
					'- Unknown exception, Status code {}'.format(sc))
				continue

			for tweet in tweets:
				retweet = False
				if 'retweeted_status' in tweet._json:
					retweet = True

				# NOTE: tweet.full_text doesn't work for retweets,
				# see https://stackoverflow.com/a/48967803

				# use current tweet by default
				tweet_data = tweet
				if retweet:
					# use original tweet to text processing
					tweet_data = tweet.retweeted_status

				tw_text = tweet_data.full_text

				self.logger.debug('- Got tweet: {}'.format(tw_text))

				# Check if tweet contains media, else check if it contains a link to an image
				extensions = ('.jpg', '.jpeg', '.png', '.gif')
				pattern = '[(%s)]$' % ')('.join(extensions)
				photo_url = ''
				tweet_text = html.unescape(tw_text)
				if 'media' in tweet_data.entities:
					photo_url = tweet_data.entities['media'][0]['media_url_https']
				else:
					for url_entity in tweet_data.entities['urls']:
						expanded_url = url_entity['expanded_url']
						if re.search(pattern, expanded_url):
							photo_url = expanded_url
							break
				if photo_url:
					self.logger.debug('- - Found media URL in tweet: ' + photo_url)

				for url_entity in tweet_data.entities['urls']:
					expanded_url = url_entity['expanded_url']
					parsed_url = urlparse(expanded_url)

					indices = url_entity['indices']
					display_url = tw_text[indices[0]:indices[1]]

					replace_text = expanded_url

					if parsed_url.netloc == 'twitter.com':
						re_pattern = '/(?P<username>.+)/status/(?P<twit_id>[0-9]+)'
						re_result = re.match(re_pattern, parsed_url.path)
						if re_result != None:
							try:
								commented_tweet = bot.tw.get_status(re_result.group('twit_id'), tweet_mode='extended')
							except:
								pass
							else:
								# TODO: implement hack for retweets
								# TODO: move text formatting into send_tweet, store only basic tweet bits
								tweet_text = 'comment:\n' + tweet_text
								replace_text = '\n\noriginal tweet:\n«{}»'.format(commented_tweet.full_text)

					tweet_text = tweet_text.replace(display_url, replace_text)

				tw_data = {
					'tw_id': tweet.id,
					'text': tweet_text,
					'created_at': tweet.created_at,
					'twitter_user': tw_user,
					'photo_url': photo_url,
				}

				if retweet:
					# store original screen name
					tw_data['original_name'] = tweet_data.user.screen_name

				try:
					t = Tweet.get(Tweet.tw_id == tweet.id)
					self.logger.warning('Got duplicated tw_id on this tweet:')
					self.logger.warning(str(tw_data))
				except Tweet.DoesNotExist:
					tweet_rows.append(tw_data)

				if len(tweet_rows) >= self.TWEET_BATCH_INSERT_COUNT:
					Tweet.insert_many(tweet_rows).execute()
					tweet_rows = []

		TwitterUser.update(last_fetched=datetime.now()) \
			.where(TwitterUser.id << [tw.id for tw in updated_tw_users]).execute()

		if updated_tw_users and tweet_rows:
			Tweet.insert_many(tweet_rows).execute()

		# send last tweets to newcomers
		subscriptions = list(Subscription.select()
							.where(Subscription.last_tweet_id == -1))
		for s in subscriptions:
			self.logger.debug(
				'Checking new subscription {} {}'.format(s.tg_chat.chat_id, s.tw_user.screen_name))

			try:
				tw = s.tw_user.tweets.select() \
					.order_by(Tweet.tw_id.desc()) \
					.first()
				if tw is None:
					s.last_tweet_id = 0
					s.save()
					self.logger.warning('Something fishy is going on here...')
				else:
					bot.send_tweet(s.tg_chat, tw)
					# save the latest tweet sent on this subscription
					s.last_tweet_id = tw.tw_id
					s.save()
			except IndexError:
				self.logger.debug('- No tweets available yet on {}'.format(s.tw_user.screen_name))

		# send the new tweets to existing subscribers
		query = '''SELECT S.* FROM subscription S
		INNER JOIN twitteruser TU
		ON S.tw_user_id = TU.id
		WHERE S.last_tweet_id <
		(
			SELECT tw_id
			FROM tweet T
			WHERE T.twitter_user_id = TU.id
			ORDER BY T.tw_id DESC
			LIMIT 1
		)
'''
		subscriptions = list(Subscription.raw(query))
		for s in subscriptions:
			# are there new tweets? send em all!
			self.logger.debug(
				'Checking subscription {} {}'.format(s.tg_chat.chat_id, s.tw_user.screen_name))

			self.logger.debug('- Some fresh tweets here which was not sended yet!')

			last_sended_tweet_id = s.last_tweet_id

			for tw in (s.tw_user.tweets.select()
								.where(Tweet.tw_id > s.last_tweet_id)
								.order_by(Tweet.tw_id.asc())
					):
				if bot.send_tweet(s.tg_chat, tw):
					last_sended_tweet_id = tw.tw_id
				else:
					break

			# save the latest tweet sent on this subscription
			s.last_tweet_id = last_sended_tweet_id
			s.save()
			continue

			self.logger.debug('- No new tweets here.')


		self.logger.debug('Starting tw_user cleanup')
		if not users_to_cleanup:
			self.logger.debug('- Nothing to cleanup')
		else:
			for tw_user, reason in users_to_cleanup:
				self.logger.debug('- Cleaning up subs on user @{}, {}'.format(tw_user.screen_name, reason))
				message = INFO_CLEANUP[reason].format(tw_user.screen_name)
				subs = list(tw_user.subscriptions)
				for s in subs:
					chat = s.tg_chat
					if chat.delete_soon:
						self.logger.debug ('- - skipping because of delete_soon chatid={}'.format(chat_id))
						continue
					chat_id = chat.chat_id
					self.logger.debug ('- - bye on chatid={}'.format(chat_id))
					s.delete_instance()

					try:
						bot.sendMessage(chat_id=chat_id, text=message)
					except TelegramError as e:
						self.logger.info('Couldn\'t send unsubscription notice of {} to chat {}: {}'.format(
							tw_user.screen_name, chat_id, e.message
						))

						delet_this = None

						if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
							delet_this = True

						if e.message == 'Unauthorized':
							delet_this = True

						if delet_this:
							self.logger.info('Marking chat for deletion')
							chat.delete_soon = True
							chat.save()

			self.logger.debug('- Cleaning up TwitterUser @{}'.format(tw_user.screen_name, reason))
			tw_user.delete_instance()

			self.logger.debug ('- User cleanup finished')

		self.logger.debug('Cleaning up TelegramChats marked for deletion')
		for chat in TelegramChat.select().where(TelegramChat.delete_soon == True):
			chat.delete_instance(recursive=True)
			self.logger.debug('Deleting chat {}'.format(chat.chat_id))
		self.logger.debug('TelegramChats cleanup finished')
예제 #4
0
    def run(self, bot):
        self.logger.debug("Fetching tweets...")
        tweet_rows = []
        # fetch the tw users' tweets
        tw_users = list((TwitterUser.select()
                         .join(Subscription)
                         .group_by(TwitterUser)
                         .order_by(TwitterUser.last_fetched)))
        updated_tw_users = []
        users_to_cleanup = []

        for tw_user in tw_users:
            try:
                if tw_user.last_tweet_id == 0:
                    # get just the latest tweet
                    self.logger.debug(
                        "Fetching latest tweet by {}".format(tw_user.screen_name))
                    tweets = bot.tw.user_timeline(
                        screen_name=tw_user.screen_name,
                        count=1)
                else:
                    # get the fresh tweets
                    self.logger.debug(
                        "Fetching new tweets from {}".format(tw_user.screen_name))
                    tweets = bot.tw.user_timeline(
                        screen_name=tw_user.screen_name,
                        since_id=tw_user.last_tweet_id)
                updated_tw_users.append(tw_user)
            except tweepy.error.TweepError as e:
                sc = e.response.status_code
                if sc == 429:
                    self.logger.debug("- Hit ratelimit, breaking.")
                    break

                if sc == 401:
                    users_to_cleanup.append((tw_user, 'PROTECTED'))
                    self.logger.debug("- Protected tweets here. Cleaning up this user")
                    continue

                if sc == 404:
                    users_to_cleanup.append((tw_user, 'NOTFOUND'))
                    self.logger.debug("- 404? Maybe screen name changed? Cleaning up this user")
                    continue

                self.logger.debug(
                    "- Unknown exception, Status code {}".format(sc))
                continue

            for tweet in tweets:
                self.logger.debug("- Got tweet: {}".format(tweet.text))

                # Check if tweet contains media, else check if it contains a link to an image
                extensions = ('.jpg', '.jpeg', '.png', '.gif')
                pattern = '[(%s)]$' % ')('.join(extensions)
                photo_url = ''
                tweet_text = html.unescape(tweet.text)
                if 'media' in tweet.entities:
                    photo_url = tweet.entities['media'][0]['media_url_https']
                else:
                    for url_entity in tweet.entities['urls']:
                        expanded_url = url_entity['expanded_url']
                        if re.search(pattern, expanded_url):
                            photo_url = expanded_url
                            break
                if photo_url:
                    self.logger.debug("- - Found media URL in tweet: " + photo_url)

                for url_entity in tweet.entities['urls']:
                    expanded_url = url_entity['expanded_url']
                    indices = url_entity['indices']
                    display_url = tweet.text[indices[0]:indices[1]]
                    tweet_text = tweet_text.replace(display_url, expanded_url)

                tw_data = {
                    'tw_id': tweet.id,
                    'text': tweet_text,
                    'created_at': tweet.created_at,
                    'twitter_user': tw_user,
                    'photo_url': photo_url,
                }
                try:
                    t = Tweet.get(Tweet.tw_id == tweet.id)
                    self.logger.warning("Got duplicated tw_id on this tweet:")
                    self.logger.warning(str(tw_data))
                except Tweet.DoesNotExist:
                    tweet_rows.append(tw_data)

                if len(tweet_rows) >= self.TWEET_BATCH_INSERT_COUNT:
                    Tweet.insert_many(tweet_rows).execute()
                    tweet_rows = []

        TwitterUser.update(last_fetched=datetime.now()) \
            .where(TwitterUser.id << [tw.id for tw in updated_tw_users]).execute()

        if not updated_tw_users:
            return

        if tweet_rows:
            Tweet.insert_many(tweet_rows).execute()

        # send the new tweets to subscribers
        subscriptions = list(Subscription.select()
                             .where(Subscription.tw_user << updated_tw_users))
        for s in subscriptions:
            # are there new tweets? send em all!
            self.logger.debug(
                "Checking subscription {} {}".format(s.tg_chat.chat_id, s.tw_user.screen_name))

            if s.last_tweet_id == 0:  # didn't receive any tweet yet
                try:
                    tw = s.tw_user.tweets.select() \
                        .order_by(Tweet.tw_id.desc()) \
                        .first()
                    if tw is None:
                        self.logger.warning("Something fishy is going on here...")
                    else:
                        bot.send_tweet(s.tg_chat, tw)
                        # save the latest tweet sent on this subscription
                        s.last_tweet_id = tw.tw_id
                        s.save()
                except IndexError:
                    self.logger.debug("- No tweets available yet on {}".format(s.tw_user.screen_name))

                continue

            if s.tw_user.last_tweet_id > s.last_tweet_id:
                self.logger.debug("- Some fresh tweets here!")
                for tw in (s.tw_user.tweets.select()
                                    .where(Tweet.tw_id > s.last_tweet_id)
                                    .order_by(Tweet.tw_id.asc())
                           ):
                    bot.send_tweet(s.tg_chat, tw)

                # save the latest tweet sent on this subscription
                s.last_tweet_id = s.tw_user.last_tweet_id
                s.save()
                continue

            self.logger.debug("- No new tweets here.")


        self.logger.debug("Starting tw_user cleanup")
        if not users_to_cleanup:
            self.logger.debug("- Nothing to cleanup")
        else:
            for tw_user, reason in users_to_cleanup:
                self.logger.debug("- Cleaning up subs on user @{}, {}".format(tw_user.screen_name, reason))
                message = INFO_CLEANUP[reason].format(tw_user.screen_name)
                subs = list(tw_user.subscriptions)
                for s in subs:
                    chat = s.tg_chat
                    if chat.delete_soon:
                        self.logger.debug ("- - skipping because of delete_soon chatid={}".format(chat_id))
                        continue
                    chat_id = chat.chat_id
                    self.logger.debug ("- - bye on chatid={}".format(chat_id))
                    s.delete_instance()

                    try:
                        bot.sendMessage(chat_id=chat_id, text=message)
                    except TelegramError as e:
                        self.logger.info("Couldn't send unsubscription notice of {} to chat {}: {}".format(
                            tw_user.screen_name, chat_id, e.message
                        ))

                        delet_this = None

                        if e.message == 'Bad Request: group chat was migrated to a supergroup chat':
                            delet_this = True

                        if e.message == "Unauthorized":
                            delet_this = True

                        if delet_this:
                            self.logger.info("Marking chat for deletion")
                            chat.delete_soon = True
                            chat.save()

            self.logger.debug("- Cleaning up TwitterUser @{}".format(tw_user.screen_name, reason))
            tw_user.delete_instance()

            self.logger.debug ("- Cleanup finished")

        self.logger.debug("Cleaning up TelegramChats marked for deletion")
        for chat in TelegramChat.select().where(TelegramChat.delete_soon == True):
            chat.delete_instance(recursive=True)
            self.logger.debug("Deleting chat {}".format(chat.chat_id))