def getTweets(): """ Get list of tweets, with tweet ID and content, from configured Twitter account URL. This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page. The data is returned as a list of dictionaries that can be used by other functions. """ all_tweets = [] url = helpers._config('tweets.source_account_url') if not url: helpers._error('getTweets() => The source Twitter account URL (' + url + ') was incorrect. Could not retrieve tweets.') return False headers = {} headers['accept-language'] = 'en-US,en;q=0.9' headers['dnt'] = '1' headers['user-agent'] = helpers._config('gen.APP_NAME') data = requests.get(url) html = BeautifulSoup(data.text, 'html.parser') timeline = html.select('#timeline li.stream-item') if timeline is None: helpers._error( 'getTweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL (' + url + ') is correct.') return False helpers._info('getTweets() => Fetched tweets for ' + url + '.') for tweet in timeline: tweet_id = tweet['data-item-id'] try: tweet_text = tweet.select('p.tweet-text')[0].get_text() except: helpers._info('getTweets() => No tweet text found. Moving on...') continue all_tweets.append({"id": tweet_id, "text": tweet_text}) return all_tweets if len(all_tweets) > 0 else None
def get_tweets(): """ Get list of tweets, with tweet ID and content, from configured Twitter account URL. This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page. The data is returned as a list of dictionaries that can be used by other functions. """ all_tweets = [] url = helpers._config("TT_SOURCE_TWITTER_URL") if not url: logger.error( "get_tweets() => The source Twitter account URL ({}) was incorrect. Could not retrieve tweets." .format(url)) return False headers = {} headers["accept-language"] = "en-US,en;q=0.9" headers["dnt"] = "1" headers["user-agent"] = helpers._config("TT_APP_NAME") data = requests.get(url) html = BeautifulSoup(data.text, "html.parser") timeline = html.select("#timeline li.stream-item") if timeline is None: logger.error( "get_tweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL ({}) is correct." .format(url)) return False logger.info("get_tweets() => Fetched tweets for {}.".format(url)) for tweet in timeline: try: tweet_id = tweet["data-item-id"] tweet_text = tweet.select("p.tweet-text")[0].get_text().encode( "utf-8") tweet_time = int( tweet.select("span._timestamp")[0].attrs["data-time-ms"]) all_tweets.append({ "id": tweet_id, "text": tweet_text, "time": tweet_time }) except Exception as e: logger.error("get_tweets() => No tweet text found.") logger.error(e) continue return all_tweets if len(all_tweets) > 0 else None
def runJob(tweet_url): config = "config.json" # Initialize variables app_name = helpers._config("TT_APP_NAME", config) twitter_url = tweet_url mastodon_url = helpers._config("TT_HOST_INSTANCE", config) mastodon_token = helpers._config("TT_APP_SECURE_TOKEN", config) twitter_api_key = helpers._config("TT_TWITTER_CONSUMER_KEY", config) twitter_api_secret = helpers._config("TT_TWITTER_CONSUMER_SECRET", config) twitter_user_key = helpers._config("TT_TWITTER_TOKEN", config) twitter_user_secret = helpers._config("TT_TWITTER_TOKEN_SECRET", config) strip_urls = False if (helpers._config("TT_STRIP_URLS", config).lower() == "yes"): strip_urls = True try: job = tweettoot.TweetToot( app_name=app_name, twitter_url=twitter_url, mastodon_url=mastodon_url, mastodon_token=mastodon_token, twitter_api_key=twitter_api_key, twitter_api_secret=twitter_api_secret, twitter_user_key=twitter_user_key, twitter_user_secret=twitter_user_secret, strip_urls=strip_urls, ) job.relay() except Exception as e: logger.critical(e) traceback.print_exc() return True
def getTweets(): """ Get list of tweets, with tweet ID and content, from configured Twitter account URL. This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page. The data is returned as a list of dictionaries that can be used by other functions. """ all_tweets = [] url = helpers._config("tweets.source_account_url") if not url: helpers._error( f"getTweets() => The source Twitter account URL ({url}) was incorrect. Could not retrieve tweets." ) return False headers = {} headers["accept-language"] = "en-US,en;q=0.9" headers["dnt"] = "1" headers["user-agent"] = helpers._config("gen.APP_NAME") data = requests.get(url) html = BeautifulSoup(data.text, "html.parser") timeline = html.select("#timeline li.stream-item") if timeline is None: helpers._error( f"getTweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL ({url}) is correct." ) return False helpers._info(f"getTweets() => Fetched tweets for {url}.") for tweet in timeline: tweet_id = tweet["data-item-id"] try: tweet_text = tweet.select("p.tweet-text")[0].get_text() except: helpers._info("getTweets() => No tweet text found. Moving on...") continue all_tweets.append({"id": tweet_id, "text": tweet_text}) return all_tweets if len(all_tweets) > 0 else None
def _get_timestamp_file_path(self): """ Get file path that stores tweet timestamp. :type self: :param self: :raises: :rtype: str """ return (helpers._config("TT_CACHE_PATH") + "tt_" + sha1( self.twitter_url.encode("utf-8") + self.mastodon_url.encode("utf-8")).hexdigest())
def runJob(): for config in configs: # Initialize variables app_name = helpers._config("TT_APP_NAME", config) twitter_url = helpers._config("TT_SOURCE_TWITTER_URL", config) mastodon_url = helpers._config("TT_HOST_INSTANCE", config) mastodon_token = helpers._config("TT_APP_SECURE_TOKEN", config) twitter_user_id = helpers._config("TT_TWITTER_USER_ID", config) twitter_api_key = helpers._config("TT_TWITTER_CONSUMER_KEY", config) twitter_api_secret = helpers._config("TT_TWITTER_CONSUMER_SECRET", config) twitter_user_key = helpers._config("TT_TWITTER_TOKEN", config) twitter_user_secret = helpers._config("TT_TWITTER_TOKEN_SECRET", config) tweet_amount = helpers._config("TT_NUMBER_OF_TWEETS", config) strip_urls = False include_rts = False misskey = False if (helpers._config("TT_STRIP_URLS", config).lower() == "yes"): strip_urls = True if (helpers._config("TT_INCLUDE_RTS", config).lower() == "yes"): include_rts = True if (helpers._config("TT_MISSKEY", config).lower() == "yes"): misskey = True try: job = tweettoot.TweetToot(app_name=app_name, twitter_url=twitter_url, mastodon_url=mastodon_url, mastodon_token=mastodon_token, twitter_user_id=twitter_user_id, twitter_api_key=twitter_api_key, twitter_api_secret=twitter_api_secret, twitter_user_key=twitter_user_key, twitter_user_secret=twitter_user_secret, strip_urls=strip_urls, include_rts=include_rts, tweet_amount=tweet_amount, misskey=misskey) job.relay() except Exception as e: logger.critical(e) traceback.print_exc() return True
import traceback # Initialize common logging options logger = logging.getLogger(__name__) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) configs = [] with open("./config_files") as file: for line in file: line = line.strip() configs.append(line) every_x_minutes = helpers._config("TT_RUN_EVERY_X_MINUTES", "sysconfig.json") def runJob(): for config in configs: # Initialize variables app_name = helpers._config("TT_APP_NAME", config) twitter_url = helpers._config("TT_SOURCE_TWITTER_URL", config) mastodon_url = helpers._config("TT_HOST_INSTANCE", config) mastodon_token = helpers._config("TT_APP_SECURE_TOKEN", config) twitter_user_id = helpers._config("TT_TWITTER_USER_ID", config) twitter_api_key = helpers._config("TT_TWITTER_CONSUMER_KEY", config) twitter_api_secret = helpers._config("TT_TWITTER_CONSUMER_SECRET", config) twitter_user_key = helpers._config("TT_TWITTER_TOKEN", config) twitter_user_secret = helpers._config("TT_TWITTER_TOKEN_SECRET",
def tootTheTweet(tweet): """ Receieve a dictionary containing Tweet ID and text... and TOOT! This function relies on the requests library to post the content to your Mastodon account (human or bot). A boolean success status is returned. Arguments: tweet {dictionary} -- Dictionary containing the "id" and "text" of a single tweet. """ host_instance = helpers._config('toots.host_instance') token = helpers._config('toots.app_secure_token') tweet_id = tweet['id'] if not host_instance: helpers._error('tootTheTweet() => Your host Mastodon instance URL (' + host_instance + ') was incorrect.') return False if not token: helpers._error( 'tootTheTweet() => Your Mastodon access token was incorrect.') return False headers = {} headers['Authorization'] = 'Bearer ' + token headers['Idempotency-Key'] = tweet_id data = {} data['status'] = tweet['text'] data['visibility'] = 'public' tweet_check_file_path = helpers._config('toots.cache_path') + tweet['id'] tweet_check_file = Path(tweet_check_file_path) if tweet_check_file.is_file(): helpers._info('tootTheTweet() => Tweet ' + tweet_id + ' was already posted. Reposting...') return False else: tweet['text'].encode('utf-8') tweet_check = open(tweet_check_file_path, mode='w') tweet_check.write(tweet['text']) tweet_check.close() helpers._info('tootTheTweet() => New tweet ' + tweet_id + ' => "' + tweet['text'] + '".') response = requests.post(url=host_instance + '/api/v1/statuses', data=data, headers=headers) if response.status_code == 200: helpers._info('tootTheTweet() => OK. Posted tweet ' + tweet_id + 'to Mastodon.') helpers._info('tootTheTweet() => Response: ' + response.text) return True else: helpers._info('tootTheTweet() => FAIL. Could not post tweet ' + tweet_id + 'to Mastodon.') helpers._info('tootTheTweet() => Response: ' + response.text) return False
def toot_the_tweet(tweet): """ Receieve a dictionary containing Tweet ID and text... and TOOT! This function relies on the requests library to post the content to your Mastodon account (human or bot). A boolean success status is returned. Arguments: tweet {dictionary} -- Dictionary containing the "id" and "text" of a single tweet. """ host_instance = helpers._config("TT_HOST_INSTANCE") token = helpers._config("TT_APP_SECURE_TOKEN") timestamp_file = helpers._config("TT_CACHE_PATH") + "last_tweet_tooted" if not host_instance: logger.error( "toot_the_tweet() => Your host Mastodon instance URL ({}) was incorrect." .format(host_instance)) return False if not token: logger.error( "toot_the_tweet() => Your Mastodon access token was incorrect.") return False last_timestamp = helpers._read_file(timestamp_file) if not last_timestamp: helpers._write_file(timestamp_file, str(tweet["time"])) return False last_timestamp = int(last_timestamp) headers = {} headers["Authorization"] = "Bearer {}".format(token) headers["Idempotency-Key"] = tweet["id"] data = {} data["status"] = tweet["text"] data["visibility"] = "public" if tweet["time"] <= last_timestamp: logger.info("toot_the_tweet() => No new tweets. Moving on.") return None last_timestamp = helpers._write_file(timestamp_file, str(tweet["time"])) logger.info('toot_the_tweet() => New tweet {} => "{}".'.format( tweet["id"], tweet["text"])) response = requests.post(url="{}/api/v1/statuses".format(host_instance), data=data, headers=headers) if response.status_code == 200: logger.info( "toot_the_tweet() => OK. Posted tweet {} to Mastodon.".format( tweet['id'])) logger.info("toot_the_tweet() => Response: {}".format(response.text)) return True else: logger.info( "toot_the_tweet() => FAIL. Could not post tweet {} to Mastodon.". format(tweet['id'])) logger.info("toot_the_tweet() => Response: {}".format(response.text)) return False
def tootTheTweet(tweet): """ Receieve a dictionary containing Tweet ID and text... and TOOT! This function relies on the requests library to post the content to your Mastodon account (human or bot). A boolean success status is returned. Arguments: tweet {dictionary} -- Dictionary containing the "id" and "text" of a single tweet. """ host_instance = helpers._config("toots.host_instance") token = helpers._config("toots.app_secure_token") tweet_id = tweet["id"] if not host_instance: helpers._error( f"tootTheTweet() => Your host Mastodon instance URL ({host_instance}) was incorrect." ) return False if not token: helpers._error( "tootTheTweet() => Your Mastodon access token was incorrect.") return False headers = {} headers["Authorization"] = f"Bearer {token}" headers["Idempotency-Key"] = tweet_id data = {} data["status"] = tweet["text"] data["visibility"] = "public" tweet_check_file_path = helpers._config("toots.cache_path") + tweet["id"] tweet_check_file = Path(tweet_check_file_path) if tweet_check_file.is_file(): helpers._info( f"tootTheTweet() => Tweet {tweet_id} was already posted. Reposting..." ) return False else: tweet["text"].encode("utf-8") tweet_check = open(tweet_check_file_path, mode="w") tweet_check.write(tweet["text"]) tweet_check.close() helpers._info( f'tootTheTweet() => New tweet {tweet_id} => "{tweet["text"]}".') response = requests.post(url=f"{host_instance}/api/v1/statuses", data=data, headers=headers) if response.status_code == 200: helpers._info( f"tootTheTweet() => OK. Posted tweet {tweet_id} to Mastodon.") helpers._info(f"tootTheTweet() => Response: {response.text}") return True else: helpers._info( f"tootTheTweet() => FAIL. Could not post tweet {tweet_id} to Mastodon." ) helpers._info(f"tootTheTweet() => Response: {response.text}") return False
if __name__ == "__main__": """ It all starts here... This function will get a new Tweet from the configured Twitter account and publish to the configured Mastodon instance. It will only toot once per invokation to avoid flooding the instance. """ # Initialize common logging options logger = logging.getLogger(__name__) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) # Initialize variables app_name = helpers._config("TT_APP_NAME") separator = "," twitter_url = helpers._config("TT_SOURCE_TWITTER_URL").split(separator) mastodon_url = helpers._config("TT_HOST_INSTANCE").split(separator) mastodon_token = helpers._config("TT_APP_SECURE_TOKEN").split(separator) cache_path = helpers._config("TT_CACHE_PATH") mode = helpers._config("TT_MODE") if len(mastodon_url) != len(mastodon_token): logger.error( f"Lenghts of Mastodon URL ({len(mastodon_url)}) and Mastodon tokens ({len(mastodon_url)}) do not match." ) else:
def getTweets(twitter_nametopull, mastodon_secret, mastodon_host): """ Get list of tweets, with tweet ID and content, from configured Twitter account URL. This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page. The data is returned as a list of dictionaries that can be used by other functions. """ all_tweets = [] tweet_count_max = 1 # set me yes # old way from config.json file url = helpers._config('tweets.source_account_url') url = twitter_nametopull if not url: helpers._error('getTweets() => The source Twitter account URL (' + url + ') was incorrect. Could not retrieve tweets.') return False headers = {} headers['accept-language'] = 'en-US,en;q=0.9' headers['dnt'] = '1' headers['user-agent'] = helpers._config('gen.APP_NAME') # Getting users timeline pulling... data = requests.get(url) html = BeautifulSoup(data.text, 'html.parser') timeline = html.select('#timeline li.stream-item') if timeline is None: helpers._error( 'getTweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL (' + url + ') is correct.') return False helpers._info('getTweets() => Fetched tweets for ' + url + '.') tweet_count_loop = 0 tweet_error = 0 for tweet in timeline: # print(tweet) tweet_skip = 0 if (tweet_error == 0) and ( tweet_count_loop <= (tweet_count_max) ): #NOTE: tweet_count_max would be tweet_count_max MINUS 1 if you wanted to do it normally but we want the top 2 tweets in case the top one was PINNED as is a twitter feature and we would want to skip it tweet_count_loop = tweet_count_loop + 1 # print(tweet) tweet_id = tweet['data-item-id'] # suposed to let you dup post as Mastodon will reject if header same twice.... headers['Idempotency-Key'] = tweet_id tweet_text = [] retweet_text = [] tweet_url = [] tweet_datetimestamp = [] tweet_url = url + '/status/' + tweet_id try: tweet_ispinned = retweet_text = tweet.select( 'span.js-pinned-text')[0].get_text() helpers._info( 'getTweets() => This tweet is a pinned tweet. Skipping') except: tweet_ispinned = [] if (tweet_ispinned): tweet_skip = 1 try: tweet_text = tweet.select('p.tweet-text')[0].get_text() # tweet_datetimestamp = tweet.select('a.tweet-timestamp')[0].get_text() tweet_datetimestamp = tweet.select('a.tweet-timestamp')[0] tweet_datetimestamp = tweet_datetimestamp['title'] # print(tweet) try: # Only using to identify in the below ifthen since the tweet text seems to be the same. Not best way but too many scraping variables to keep track of. retweet_text = tweet.select( 'span.js-retweet-text')[0].get_text() retweet_text_itself = tweet.select( 'span.js-retweet-text')[0].get_text() # we are here. not pullnig I think .... retweet_originaltweeter=tweet.select('div.data-screen-name')[0].get_text() except: retweet_text = [] retweet_text_itself = [] if retweet_text: tweet_text = retweet_text_itself.strip( ) + ':\n ' + tweet_text.strip() helpers._info( 'getTweets() => Is Retweet!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) #helpers._info('getTweets() =>' + retweet_text) except: helpers._info( 'getTweets() => No tweet text found. Moving on...') continue # cleanup if good tweet if tweet_text: tweet_text = tweet_text.replace( '<a href', ' \n<a href' ) # should be regex this is sloppy in case they already had a space. Doesnt seem to be using this one either. Already stripped it appears tweet_text = tweet_text.replace( 'pic.twitter.com', ' \nhttps://pic.twitter.com' ) # should be regex this is sloppy tweet_text = tweet_text.replace( 'http', ' \nhttp' ) # should be regex this is sloppy in case they already had a space. # change up a specific tweet if you want tweet_text=tweet_text.replace('reassuring.','reassuring. (BOT REPOST)') helpers._info('getTweets() => =============================' + tweet['data-item-id'] + '\n\n') tweet_text = tweet_text + ' \n\nSource: ' + tweet_url + ' ' + tweet_datetimestamp tweet_text = tweet_text + '\n\nEND==================\n\nMy other bots: https://pastebin.com/yuwXfDjZ' helpers._info('getTweets() => TWEET TEXT--> ' + tweet_text) if (tweet_error == 0) and (tweet_skip == 0): all_tweets.append({"id": tweet_id, "text": tweet_text}) else: helpers._info( 'getTweets() => Not adding tweet: Either exists and error or exists and Skipping on purpose' ) return all_tweets if len(all_tweets) > 0 else None
def relay(self): """ Main code which relays tweets to the Mastodon instance. :type self: :param self: :raises: :rtype: bool """ if not self.app_name: logger.error( f"relay() => Application name in config is incorrect/empty.") return False if not self.twitter_url: logger.error( f"relay() => Twitter URL in config is incorrect/empty.") return False if not self.mastodon_url: logger.error( f"relay() => Mastodon URL in config is incorrect/empty.") return False if not self.mastodon_token: logger.error( f"relay() => Mastodon token in config is incorrect/empty.") return False logger.info( f"relay() => Init relay from {self.twitter_url} to {self.mastodon_url}. State file {self._get_timestamp_file_path()}" ) tweets = self._get_tweets() if not tweets: return True logger.debug(f"relay() => {str(tweets)}") last_timestamp = 0 for tweet_time, tweet in tweets.items(): logger.info( f"relay() => Tweeting {tweet['id']} to {self.mastodon_url}") last_timestamp = (tweet_time if tweet_time > last_timestamp else last_timestamp) if tweet["img"] != "null": img_u = tweet["img"] tweet_id = tweet["id"] d_path = helpers._config("TT_CACHE_PATH") + "img_" + tweet_id urllib.request.urlretrieve(img_u, d_path) headers = {} headers["Authorization"] = f"Bearer {self.mastodon_token}" file = {'file': open(d_path, 'rb')} m_response = requests.post( url=f"{self.mastodon_url}/api/v1/media", files=file, headers=headers) if m_response.status_code == 200: logger.info( f"toot_the_tweet() => OK. Tooted {tweet_id}'s media' to {self.mastodon_url}." ) logger.debug( f"toot_the_tweet() => Response: {m_response.text}") m_id = m_response.json()["id"] else: logger.error( f"toot_the_tweet() => Could not toot {tweet_id}'s media' to {self.mastodon_url}." ) logger.error( f"toot_the_tweet() => Response: {m_response.text}") m_id = "null" self._toot_the_tweet(mastodon_url=self.mastodon_url, tweet_id=tweet["id"], tweet_body=tweet["text"], tweet_time=tweet_time, media_id=m_id) os.remove(d_path) else: self._toot_the_tweet(mastodon_url=self.mastodon_url, tweet_id=tweet["id"], tweet_body=tweet["text"], tweet_time=tweet_time, media_id="null") self._set_last_timestamp(timestamp=last_timestamp)
def _get_tweets(self): """ Get list of new tweets, with tweet ID and content, from configured Twitter account URL. This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page. The data is returned as a list of dictionaries that can be used by other functions. :type self: :param self: :raises: :rtype: dict """ tweets = OrderedDict() last_timestamp = self._get_last_timestamp() headers = {} headers["accept-language"] = "en-US,en;q=0.9" headers["dnt"] = "1" headers["user-agent"] = self.app_name data = requests.get(self.twitter_url) html = BeautifulSoup(data.text, "html.parser") timeline = html.select("div.tweet-text") tweet_body = html.select("table.tweet") count = 0 if timeline is None: logger.error( f"get_tweets() => Could not retrieve tweets from the page. Please make sure the source Twitter URL ({self.twitter_url}) is correct." ) return False logger.info( f"get_tweets() => Fetched {len(timeline)} tweets for {self.twitter_url}." ) for tweet in timeline: try: tweet_time = int(tweet.attrs["data-id"]) if tweet_time > last_timestamp: tweet_id = tweet.attrs["data-id"] tweet_text = tweet.select("div > div")[0].get_text() # fix urls in links a_tags = tweet.select("a.twitter_external_link") tweet_img = "null" if len(a_tags) > 0: for at in a_tags: url = f'{at["data-url"]} ' at = at.get_text() tweet_text = str(tweet_text).replace(str(at), url) ori = url if "https://twitter.com/" in ori and "/photo/" in ori: url = ori url = url.replace("twitter.com", "mobile.twitter.com") pattern = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' ) url = re.findall(pattern, url) url = url[0] img_src = requests.get(url) img_html = BeautifulSoup( img_src.text, "html.parser") img = img_html.select("div.media") img = str(img) img_url = re.findall(pattern, img) tweet_img = img_url[0] tweet_text = str(tweet_text).replace( str(ori), "") if "support.twitter.com" in tweet_img: tweet_img = "null" tweet_text = tweet_text + ori + "\n This media is marked as sensitive, follow the link above to view." if "https://twitter.com/" in ori and "/video/" in ori: url = ori url = url.replace("twitter.com", "mobile.twitter.com") pattern = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' ) url = re.findall(pattern, url) url = url[0] img_src = requests.get(url) img_html = BeautifulSoup( img_src.text, "html.parser") img = img_html.select("div.media") img = str(img) img_url = re.findall(pattern, img) tweet_img = img_url[0] tweet_text = str(tweet_text).replace( str(ori), "") if "support.twitter.com" in tweet_img: tweet_img = "null" tweet_text = tweet_text + ori + "\n This media is marked as sensitive, follow the link above to view." if "https://twitter.com/" in ori and "/status/" in ori: if "/video/" in ori: print("") else: if "/photo/" in ori: print("") else: url = ori url = url.replace( "twitter.com", "mobile.twitter.com") pattern = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' ) url = re.findall(pattern, url) url = url[0] ori_tweet = requests.get(url) ori_html = BeautifulSoup( ori_tweet.text, "html.parser") ori_post = ori_html.select( "div.tweet-text")[0] ori_auth = ori_html.select( "div.fullname")[0] ori_post = str(ori_post.get_text()) ori_auth = str(ori_auth.get_text()) ori_post = ori_post.replace("\n", "") ori_auth = ori_auth.replace("\n", "") tweet_text = "Retweeted and replied to " + ori_auth + "'s tweet\n(" + ori_post + ")\nAbove is original post\n" + tweet_text tweet_text = str(tweet_text).replace( str(ori), "") if tweet_body[count].select("span.context"): tweet_context = tweet_body[count].select( "span.context")[0] tweet_context = tweet_context.get_text() ori_author = tweet_body[count].select( "strong.fullname")[0] ori_author = str(ori_author.get_text()) tweet_text = "Retweeted " + ori_author + "'s tweet: \n" + tweet_text if tweet_body[count].select("div.tweet-reply-context"): re_context = tweet_body[count].select( "div.tweet-reply-context")[0] re_context = str(re_context.get_text()) re_context = re_context.replace("\n", "") if ori: url = ori url = url.replace("twitter.com", "mobile.twitter.com") pattern = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' ) url = re.findall(pattern, url) url = url[0] ori_tweet = requests.get(url) ori_html = BeautifulSoup(ori_tweet.text, "html.parser") ori_post = ori_html.select("div.tweet-text")[0] ori_auth = ori_html.select("div.fullname")[0] ori_post = str(ori_post.get_text()) ori_auth = str(ori_auth.get_text()) ori_post = ori_post.replace("\n", "") ori_auth = ori_auth.replace("\n", "") tweet_text = str(tweet_text).replace(str(ori), "") tweet_text = re_context + "\n(" + ori_post + ")\nAbove is original post\n" + tweet_text else: tweet_text = re_context + "\n" + tweet_text ori = None count += 1 if helpers._config( "TT_MODE") == "many-to-one" or helpers._config( "TT_MODE") == "many-to-many": author = html.select("table.profile-details" )[0].select("div.fullname")[0] user = str(author.get_text()) user = user.replace("\n", "") tweet_text = user + ' said: \n' + tweet_text tweets[tweet_time] = { "id": tweet_id, "text": tweet_text, "img": tweet_img } except Exception as e: logger.error("get_tweets() => An error occurred.") logger.error(e) continue return ({k: tweets[k] for k in sorted(tweets, reverse=True)} if len(tweets) > 0 else None)