コード例 #1
0
ファイル: social.py プロジェクト: raketenlurch/tweet-toot
def getTweets():
    """ Get list of tweets, with tweet ID and content, from configured Twitter account URL.

    This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page.

    The data is returned as a list of dictionaries that can be used by other functions.
    """

    all_tweets = []

    url = helpers._config('tweets.source_account_url')

    if not url:

        helpers._error('getTweets() => The source Twitter account URL (' +
                       url + ') was incorrect. Could not retrieve tweets.')

        return False

    headers = {}
    headers['accept-language'] = 'en-US,en;q=0.9'
    headers['dnt'] = '1'
    headers['user-agent'] = helpers._config('gen.APP_NAME')

    data = requests.get(url)

    html = BeautifulSoup(data.text, 'html.parser')

    timeline = html.select('#timeline li.stream-item')

    if timeline is None:

        helpers._error(
            'getTweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL ('
            + url + ') is correct.')

        return False

    helpers._info('getTweets() => Fetched tweets for ' + url + '.')

    for tweet in timeline:

        tweet_id = tweet['data-item-id']

        try:

            tweet_text = tweet.select('p.tweet-text')[0].get_text()

        except:

            helpers._info('getTweets() => No tweet text found. Moving on...')

            continue

        all_tweets.append({"id": tweet_id, "text": tweet_text})

    return all_tweets if len(all_tweets) > 0 else None
コード例 #2
0
def get_tweets():
    """ Get list of tweets, with tweet ID and content, from configured Twitter account URL.

    This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page.

    The data is returned as a list of dictionaries that can be used by other functions.
    """

    all_tweets = []
    url = helpers._config("TT_SOURCE_TWITTER_URL")

    if not url:
        logger.error(
            "get_tweets() => The source Twitter account URL ({}) was incorrect. Could not retrieve tweets."
            .format(url))
        return False

    headers = {}
    headers["accept-language"] = "en-US,en;q=0.9"
    headers["dnt"] = "1"
    headers["user-agent"] = helpers._config("TT_APP_NAME")

    data = requests.get(url)
    html = BeautifulSoup(data.text, "html.parser")
    timeline = html.select("#timeline li.stream-item")

    if timeline is None:
        logger.error(
            "get_tweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL ({}) is correct."
            .format(url))
        return False

    logger.info("get_tweets() => Fetched tweets for {}.".format(url))

    for tweet in timeline:

        try:

            tweet_id = tweet["data-item-id"]
            tweet_text = tweet.select("p.tweet-text")[0].get_text().encode(
                "utf-8")
            tweet_time = int(
                tweet.select("span._timestamp")[0].attrs["data-time-ms"])

            all_tweets.append({
                "id": tweet_id,
                "text": tweet_text,
                "time": tweet_time
            })

        except Exception as e:

            logger.error("get_tweets() => No tweet text found.")
            logger.error(e)
            continue

    return all_tweets if len(all_tweets) > 0 else None
コード例 #3
0
ファイル: run.py プロジェクト: animeavi/tweet_fedi_reposter
def runJob(tweet_url):
    config = "config.json"

    # Initialize variables
    app_name = helpers._config("TT_APP_NAME", config)
    twitter_url = tweet_url
    mastodon_url = helpers._config("TT_HOST_INSTANCE", config)
    mastodon_token = helpers._config("TT_APP_SECURE_TOKEN", config)
    twitter_api_key = helpers._config("TT_TWITTER_CONSUMER_KEY", config)
    twitter_api_secret = helpers._config("TT_TWITTER_CONSUMER_SECRET", config)
    twitter_user_key = helpers._config("TT_TWITTER_TOKEN", config)
    twitter_user_secret = helpers._config("TT_TWITTER_TOKEN_SECRET", config)

    strip_urls = False
    if (helpers._config("TT_STRIP_URLS", config).lower() == "yes"):
        strip_urls = True

    try:
        job = tweettoot.TweetToot(
            app_name=app_name,
            twitter_url=twitter_url,
            mastodon_url=mastodon_url,
            mastodon_token=mastodon_token,
            twitter_api_key=twitter_api_key,
            twitter_api_secret=twitter_api_secret,
            twitter_user_key=twitter_user_key,
            twitter_user_secret=twitter_user_secret,
            strip_urls=strip_urls,
        )
        job.relay()
    except Exception as e:
        logger.critical(e)
        traceback.print_exc()

    return True
コード例 #4
0
ファイル: social.py プロジェクト: vinyll/tweet-toot
def getTweets():
    """ Get list of tweets, with tweet ID and content, from configured Twitter account URL.

    This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page.

    The data is returned as a list of dictionaries that can be used by other functions.
    """

    all_tweets = []
    url = helpers._config("tweets.source_account_url")

    if not url:
        helpers._error(
            f"getTweets() => The source Twitter account URL ({url}) was incorrect. Could not retrieve tweets."
        )
        return False

    headers = {}
    headers["accept-language"] = "en-US,en;q=0.9"
    headers["dnt"] = "1"
    headers["user-agent"] = helpers._config("gen.APP_NAME")

    data = requests.get(url)
    html = BeautifulSoup(data.text, "html.parser")
    timeline = html.select("#timeline li.stream-item")

    if timeline is None:
        helpers._error(
            f"getTweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL ({url}) is correct."
        )
        return False

    helpers._info(f"getTweets() => Fetched tweets for {url}.")

    for tweet in timeline:
        tweet_id = tweet["data-item-id"]
        try:
            tweet_text = tweet.select("p.tweet-text")[0].get_text()
        except:
            helpers._info("getTweets() => No tweet text found. Moving on...")
            continue
        all_tweets.append({"id": tweet_id, "text": tweet_text})
    return all_tweets if len(all_tweets) > 0 else None
コード例 #5
0
    def _get_timestamp_file_path(self):
        """ Get file path that stores tweet timestamp.

        :type self:
        :param self:

        :raises:

        :rtype: str
        """

        return (helpers._config("TT_CACHE_PATH") + "tt_" + sha1(
            self.twitter_url.encode("utf-8") +
            self.mastodon_url.encode("utf-8")).hexdigest())
コード例 #6
0
ファイル: run.py プロジェクト: animeavi/tweet-toot
def runJob():
    for config in configs:
        # Initialize variables
        app_name = helpers._config("TT_APP_NAME", config)
        twitter_url = helpers._config("TT_SOURCE_TWITTER_URL", config)
        mastodon_url = helpers._config("TT_HOST_INSTANCE", config)
        mastodon_token = helpers._config("TT_APP_SECURE_TOKEN", config)
        twitter_user_id = helpers._config("TT_TWITTER_USER_ID", config)
        twitter_api_key = helpers._config("TT_TWITTER_CONSUMER_KEY", config)
        twitter_api_secret = helpers._config("TT_TWITTER_CONSUMER_SECRET",
                                             config)
        twitter_user_key = helpers._config("TT_TWITTER_TOKEN", config)
        twitter_user_secret = helpers._config("TT_TWITTER_TOKEN_SECRET",
                                              config)
        tweet_amount = helpers._config("TT_NUMBER_OF_TWEETS", config)
        strip_urls = False
        include_rts = False
        misskey = False

        if (helpers._config("TT_STRIP_URLS", config).lower() == "yes"):
            strip_urls = True

        if (helpers._config("TT_INCLUDE_RTS", config).lower() == "yes"):
            include_rts = True

        if (helpers._config("TT_MISSKEY", config).lower() == "yes"):
            misskey = True

        try:
            job = tweettoot.TweetToot(app_name=app_name,
                                      twitter_url=twitter_url,
                                      mastodon_url=mastodon_url,
                                      mastodon_token=mastodon_token,
                                      twitter_user_id=twitter_user_id,
                                      twitter_api_key=twitter_api_key,
                                      twitter_api_secret=twitter_api_secret,
                                      twitter_user_key=twitter_user_key,
                                      twitter_user_secret=twitter_user_secret,
                                      strip_urls=strip_urls,
                                      include_rts=include_rts,
                                      tweet_amount=tweet_amount,
                                      misskey=misskey)
            job.relay()
        except Exception as e:
            logger.critical(e)
            traceback.print_exc()

    return True
コード例 #7
0
ファイル: run.py プロジェクト: animeavi/tweet-toot
import traceback

# Initialize common logging options
logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

configs = []
with open("./config_files") as file:
    for line in file:
        line = line.strip()
        configs.append(line)

every_x_minutes = helpers._config("TT_RUN_EVERY_X_MINUTES", "sysconfig.json")


def runJob():
    for config in configs:
        # Initialize variables
        app_name = helpers._config("TT_APP_NAME", config)
        twitter_url = helpers._config("TT_SOURCE_TWITTER_URL", config)
        mastodon_url = helpers._config("TT_HOST_INSTANCE", config)
        mastodon_token = helpers._config("TT_APP_SECURE_TOKEN", config)
        twitter_user_id = helpers._config("TT_TWITTER_USER_ID", config)
        twitter_api_key = helpers._config("TT_TWITTER_CONSUMER_KEY", config)
        twitter_api_secret = helpers._config("TT_TWITTER_CONSUMER_SECRET",
                                             config)
        twitter_user_key = helpers._config("TT_TWITTER_TOKEN", config)
        twitter_user_secret = helpers._config("TT_TWITTER_TOKEN_SECRET",
コード例 #8
0
ファイル: social.py プロジェクト: raketenlurch/tweet-toot
def tootTheTweet(tweet):
    """ Receieve a dictionary containing Tweet ID and text... and TOOT!

    This function relies on the requests library to post the content to your Mastodon account (human or bot).

    A boolean success status is returned.

    Arguments:
        tweet {dictionary} -- Dictionary containing the "id" and "text" of a single tweet.
    """

    host_instance = helpers._config('toots.host_instance')
    token = helpers._config('toots.app_secure_token')

    tweet_id = tweet['id']

    if not host_instance:

        helpers._error('tootTheTweet() => Your host Mastodon instance URL (' +
                       host_instance + ') was incorrect.')

        return False

    if not token:

        helpers._error(
            'tootTheTweet() => Your Mastodon access token was incorrect.')

        return False

    headers = {}
    headers['Authorization'] = 'Bearer ' + token
    headers['Idempotency-Key'] = tweet_id

    data = {}
    data['status'] = tweet['text']
    data['visibility'] = 'public'

    tweet_check_file_path = helpers._config('toots.cache_path') + tweet['id']
    tweet_check_file = Path(tweet_check_file_path)
    if tweet_check_file.is_file():

        helpers._info('tootTheTweet() => Tweet ' + tweet_id +
                      ' was already posted. Reposting...')

        return False

    else:

        tweet['text'].encode('utf-8')

        tweet_check = open(tweet_check_file_path, mode='w')
        tweet_check.write(tweet['text'])
        tweet_check.close()

        helpers._info('tootTheTweet() => New tweet ' + tweet_id + ' => "' +
                      tweet['text'] + '".')

    response = requests.post(url=host_instance + '/api/v1/statuses',
                             data=data,
                             headers=headers)

    if response.status_code == 200:

        helpers._info('tootTheTweet() => OK. Posted tweet ' + tweet_id +
                      'to Mastodon.')
        helpers._info('tootTheTweet() => Response: ' + response.text)

        return True

    else:

        helpers._info('tootTheTweet() => FAIL. Could not post tweet ' +
                      tweet_id + 'to Mastodon.')
        helpers._info('tootTheTweet() => Response: ' + response.text)

        return False
コード例 #9
0
def toot_the_tweet(tweet):
    """ Receieve a dictionary containing Tweet ID and text... and TOOT!

    This function relies on the requests library to post the content to your Mastodon account (human or bot).

    A boolean success status is returned.

    Arguments:
        tweet {dictionary} -- Dictionary containing the "id" and "text" of a single tweet.
    """

    host_instance = helpers._config("TT_HOST_INSTANCE")
    token = helpers._config("TT_APP_SECURE_TOKEN")
    timestamp_file = helpers._config("TT_CACHE_PATH") + "last_tweet_tooted"

    if not host_instance:
        logger.error(
            "toot_the_tweet() => Your host Mastodon instance URL ({}) was incorrect."
            .format(host_instance))
        return False

    if not token:
        logger.error(
            "toot_the_tweet() => Your Mastodon access token was incorrect.")
        return False

    last_timestamp = helpers._read_file(timestamp_file)
    if not last_timestamp:

        helpers._write_file(timestamp_file, str(tweet["time"]))

        return False

    last_timestamp = int(last_timestamp)

    headers = {}
    headers["Authorization"] = "Bearer {}".format(token)
    headers["Idempotency-Key"] = tweet["id"]

    data = {}
    data["status"] = tweet["text"]
    data["visibility"] = "public"

    if tweet["time"] <= last_timestamp:

        logger.info("toot_the_tweet() => No new tweets. Moving on.")

        return None

    last_timestamp = helpers._write_file(timestamp_file, str(tweet["time"]))

    logger.info('toot_the_tweet() => New tweet {} => "{}".'.format(
        tweet["id"], tweet["text"]))

    response = requests.post(url="{}/api/v1/statuses".format(host_instance),
                             data=data,
                             headers=headers)

    if response.status_code == 200:
        logger.info(
            "toot_the_tweet() => OK. Posted tweet {} to Mastodon.".format(
                tweet['id']))
        logger.info("toot_the_tweet() => Response: {}".format(response.text))
        return True

    else:
        logger.info(
            "toot_the_tweet() => FAIL. Could not post tweet {} to Mastodon.".
            format(tweet['id']))
        logger.info("toot_the_tweet() => Response: {}".format(response.text))
        return False
コード例 #10
0
ファイル: social.py プロジェクト: vinyll/tweet-toot
def tootTheTweet(tweet):
    """ Receieve a dictionary containing Tweet ID and text... and TOOT!

    This function relies on the requests library to post the content to your Mastodon account (human or bot).

    A boolean success status is returned.

    Arguments:
        tweet {dictionary} -- Dictionary containing the "id" and "text" of a single tweet.
    """

    host_instance = helpers._config("toots.host_instance")
    token = helpers._config("toots.app_secure_token")
    tweet_id = tweet["id"]

    if not host_instance:
        helpers._error(
            f"tootTheTweet() => Your host Mastodon instance URL ({host_instance}) was incorrect."
        )
        return False

    if not token:
        helpers._error(
            "tootTheTweet() => Your Mastodon access token was incorrect.")
        return False

    headers = {}
    headers["Authorization"] = f"Bearer {token}"
    headers["Idempotency-Key"] = tweet_id

    data = {}
    data["status"] = tweet["text"]
    data["visibility"] = "public"

    tweet_check_file_path = helpers._config("toots.cache_path") + tweet["id"]
    tweet_check_file = Path(tweet_check_file_path)
    if tweet_check_file.is_file():
        helpers._info(
            f"tootTheTweet() => Tweet {tweet_id} was already posted. Reposting..."
        )
        return False
    else:
        tweet["text"].encode("utf-8")

        tweet_check = open(tweet_check_file_path, mode="w")
        tweet_check.write(tweet["text"])
        tweet_check.close()

        helpers._info(
            f'tootTheTweet() => New tweet {tweet_id} => "{tweet["text"]}".')

    response = requests.post(url=f"{host_instance}/api/v1/statuses",
                             data=data,
                             headers=headers)

    if response.status_code == 200:
        helpers._info(
            f"tootTheTweet() => OK. Posted tweet {tweet_id} to Mastodon.")
        helpers._info(f"tootTheTweet() => Response: {response.text}")
        return True

    else:
        helpers._info(
            f"tootTheTweet() => FAIL. Could not post tweet {tweet_id} to Mastodon."
        )
        helpers._info(f"tootTheTweet() => Response: {response.text}")
        return False
コード例 #11
0
if __name__ == "__main__":
    """ It all starts here...

    This function will get a new Tweet from the configured Twitter account and publish to the configured Mastodon instance.
    It will only toot once per invokation to avoid flooding the instance.
    """

    # Initialize common logging options
    logger = logging.getLogger(__name__)
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    # Initialize variables
    app_name = helpers._config("TT_APP_NAME")
    separator = ","
    twitter_url = helpers._config("TT_SOURCE_TWITTER_URL").split(separator)
    mastodon_url = helpers._config("TT_HOST_INSTANCE").split(separator)
    mastodon_token = helpers._config("TT_APP_SECURE_TOKEN").split(separator)
    cache_path = helpers._config("TT_CACHE_PATH")
    mode = helpers._config("TT_MODE")

    if len(mastodon_url) != len(mastodon_token):

        logger.error(
            f"Lenghts of Mastodon URL ({len(mastodon_url)}) and Mastodon tokens ({len(mastodon_url)}) do not match."
        )

    else:
コード例 #12
0
ファイル: social.py プロジェクト: joejoe/tweet-toot
def getTweets(twitter_nametopull, mastodon_secret, mastodon_host):
    """ Get list of tweets, with tweet ID and content, from configured Twitter account URL.

    This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page.

    The data is returned as a list of dictionaries that can be used by other functions.
    """

    all_tweets = []
    tweet_count_max = 1  # set me yes

    # old way from config.json file url = helpers._config('tweets.source_account_url')
    url = twitter_nametopull

    if not url:

        helpers._error('getTweets() => The source Twitter account URL (' +
                       url + ') was incorrect. Could not retrieve tweets.')

        return False

    headers = {}
    headers['accept-language'] = 'en-US,en;q=0.9'
    headers['dnt'] = '1'
    headers['user-agent'] = helpers._config('gen.APP_NAME')

    # Getting users timeline pulling...
    data = requests.get(url)

    html = BeautifulSoup(data.text, 'html.parser')

    timeline = html.select('#timeline li.stream-item')

    if timeline is None:

        helpers._error(
            'getTweets() => Could not retrieve tweets from the page. Please make sure the source Twitter account URL ('
            + url + ') is correct.')

        return False

    helpers._info('getTweets() => Fetched tweets for ' + url + '.')

    tweet_count_loop = 0
    tweet_error = 0

    for tweet in timeline:
        # print(tweet)
        tweet_skip = 0
        if (tweet_error == 0) and (
                tweet_count_loop <= (tweet_count_max)
        ):  #NOTE: tweet_count_max would be tweet_count_max MINUS 1 if you wanted to do it normally but we want the top 2 tweets in case the top one was PINNED as is a twitter feature and we would want to skip it
            tweet_count_loop = tweet_count_loop + 1
            # print(tweet)
            tweet_id = tweet['data-item-id']
            # suposed to let you dup post as Mastodon will reject if header same twice.... headers['Idempotency-Key'] = tweet_id

            tweet_text = []
            retweet_text = []
            tweet_url = []
            tweet_datetimestamp = []

            tweet_url = url + '/status/' + tweet_id

            try:
                tweet_ispinned = retweet_text = tweet.select(
                    'span.js-pinned-text')[0].get_text()
                helpers._info(
                    'getTweets() => This tweet is a pinned tweet. Skipping')
            except:
                tweet_ispinned = []

            if (tweet_ispinned):
                tweet_skip = 1

            try:

                tweet_text = tweet.select('p.tweet-text')[0].get_text()
                # tweet_datetimestamp = tweet.select('a.tweet-timestamp')[0].get_text()
                tweet_datetimestamp = tweet.select('a.tweet-timestamp')[0]
                tweet_datetimestamp = tweet_datetimestamp['title']
                # print(tweet)

                try:
                    # Only using to identify in the below ifthen since the tweet text seems to be the same. Not best way but too many scraping variables to keep track of.
                    retweet_text = tweet.select(
                        'span.js-retweet-text')[0].get_text()
                    retweet_text_itself = tweet.select(
                        'span.js-retweet-text')[0].get_text()
                    # we are here. not pullnig I think .... retweet_originaltweeter=tweet.select('div.data-screen-name')[0].get_text()
                except:
                    retweet_text = []
                    retweet_text_itself = []

                if retweet_text:

                    tweet_text = retweet_text_itself.strip(
                    ) + ':\n ' + tweet_text.strip()

                    helpers._info(
                        'getTweets() => Is Retweet!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
                    )

                    #helpers._info('getTweets() =>' + retweet_text)
            except:

                helpers._info(
                    'getTweets() => No tweet text found. Moving on...')

                continue

            # cleanup if good tweet
            if tweet_text:
                tweet_text = tweet_text.replace(
                    '<a href', ' \n<a href'
                )  # should be regex this is sloppy in case they already had a space. Doesnt seem to be using this one either. Already stripped it appears
                tweet_text = tweet_text.replace(
                    'pic.twitter.com', ' \nhttps://pic.twitter.com'
                )  # should be regex this is sloppy
                tweet_text = tweet_text.replace(
                    'http', ' \nhttp'
                )  # should be regex this is sloppy in case they already had a space.
                # change up a specific tweet if you want tweet_text=tweet_text.replace('reassuring.','reassuring. (BOT REPOST)')
                helpers._info('getTweets() => =============================' +
                              tweet['data-item-id'] + '\n\n')
                tweet_text = tweet_text + ' \n\nSource: ' + tweet_url + ' ' + tweet_datetimestamp
                tweet_text = tweet_text + '\n\nEND==================\n\nMy other bots: https://pastebin.com/yuwXfDjZ'
                helpers._info('getTweets() => TWEET TEXT--> ' + tweet_text)
            if (tweet_error == 0) and (tweet_skip == 0):
                all_tweets.append({"id": tweet_id, "text": tweet_text})
            else:
                helpers._info(
                    'getTweets() => Not adding tweet: Either exists and error or exists and Skipping on purpose'
                )

    return all_tweets if len(all_tweets) > 0 else None
コード例 #13
0
    def relay(self):
        """ Main code which relays tweets to the Mastodon instance.

        :type self:
        :param self:
    
        :raises:
    
        :rtype: bool
        """

        if not self.app_name:

            logger.error(
                f"relay() => Application name in config is incorrect/empty.")

            return False

        if not self.twitter_url:

            logger.error(
                f"relay() => Twitter URL in config is incorrect/empty.")

            return False

        if not self.mastodon_url:

            logger.error(
                f"relay() => Mastodon URL in config is incorrect/empty.")

            return False

        if not self.mastodon_token:

            logger.error(
                f"relay() => Mastodon token in config is incorrect/empty.")

            return False

        logger.info(
            f"relay() => Init relay from {self.twitter_url} to {self.mastodon_url}. State file {self._get_timestamp_file_path()}"
        )

        tweets = self._get_tweets()

        if not tweets:

            return True

        logger.debug(f"relay() => {str(tweets)}")

        last_timestamp = 0

        for tweet_time, tweet in tweets.items():

            logger.info(
                f"relay() => Tweeting {tweet['id']} to {self.mastodon_url}")

            last_timestamp = (tweet_time if tweet_time > last_timestamp else
                              last_timestamp)

            if tweet["img"] != "null":
                img_u = tweet["img"]
                tweet_id = tweet["id"]
                d_path = helpers._config("TT_CACHE_PATH") + "img_" + tweet_id
                urllib.request.urlretrieve(img_u, d_path)

                headers = {}
                headers["Authorization"] = f"Bearer {self.mastodon_token}"
                file = {'file': open(d_path, 'rb')}

                m_response = requests.post(
                    url=f"{self.mastodon_url}/api/v1/media",
                    files=file,
                    headers=headers)
                if m_response.status_code == 200:

                    logger.info(
                        f"toot_the_tweet() => OK. Tooted {tweet_id}'s media' to {self.mastodon_url}."
                    )
                    logger.debug(
                        f"toot_the_tweet() => Response: {m_response.text}")
                    m_id = m_response.json()["id"]
                else:

                    logger.error(
                        f"toot_the_tweet() => Could not toot {tweet_id}'s media' to {self.mastodon_url}."
                    )
                    logger.error(
                        f"toot_the_tweet() => Response: {m_response.text}")
                    m_id = "null"
                self._toot_the_tweet(mastodon_url=self.mastodon_url,
                                     tweet_id=tweet["id"],
                                     tweet_body=tweet["text"],
                                     tweet_time=tweet_time,
                                     media_id=m_id)
                os.remove(d_path)
            else:
                self._toot_the_tweet(mastodon_url=self.mastodon_url,
                                     tweet_id=tweet["id"],
                                     tweet_body=tweet["text"],
                                     tweet_time=tweet_time,
                                     media_id="null")

        self._set_last_timestamp(timestamp=last_timestamp)
コード例 #14
0
    def _get_tweets(self):
        """ Get list of new tweets, with tweet ID and content, from configured Twitter account URL.
        This function relies on BeautifulSoup to extract the tweet IDs and content of all tweets on the specified page.
        The data is returned as a list of dictionaries that can be used by other functions.

        :type self:
        :param self:

        :raises:

        :rtype: dict
        """

        tweets = OrderedDict()
        last_timestamp = self._get_last_timestamp()

        headers = {}
        headers["accept-language"] = "en-US,en;q=0.9"
        headers["dnt"] = "1"
        headers["user-agent"] = self.app_name

        data = requests.get(self.twitter_url)
        html = BeautifulSoup(data.text, "html.parser")
        timeline = html.select("div.tweet-text")
        tweet_body = html.select("table.tweet")
        count = 0

        if timeline is None:

            logger.error(
                f"get_tweets() => Could not retrieve tweets from the page. Please make sure the source Twitter URL ({self.twitter_url}) is correct."
            )
            return False

        logger.info(
            f"get_tweets() => Fetched {len(timeline)} tweets for {self.twitter_url}."
        )

        for tweet in timeline:

            try:

                tweet_time = int(tweet.attrs["data-id"])

                if tweet_time > last_timestamp:

                    tweet_id = tweet.attrs["data-id"]
                    tweet_text = tweet.select("div > div")[0].get_text()

                    # fix urls in links
                    a_tags = tweet.select("a.twitter_external_link")
                    tweet_img = "null"
                    if len(a_tags) > 0:
                        for at in a_tags:
                            url = f'{at["data-url"]} '
                            at = at.get_text()
                            tweet_text = str(tweet_text).replace(str(at), url)
                            ori = url
                            if "https://twitter.com/" in ori and "/photo/" in ori:
                                url = ori
                                url = url.replace("twitter.com",
                                                  "mobile.twitter.com")
                                pattern = re.compile(
                                    r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
                                )
                                url = re.findall(pattern, url)
                                url = url[0]
                                img_src = requests.get(url)
                                img_html = BeautifulSoup(
                                    img_src.text, "html.parser")
                                img = img_html.select("div.media")
                                img = str(img)
                                img_url = re.findall(pattern, img)
                                tweet_img = img_url[0]
                                tweet_text = str(tweet_text).replace(
                                    str(ori), "")
                                if "support.twitter.com" in tweet_img:
                                    tweet_img = "null"
                                    tweet_text = tweet_text + ori + "\n This media is marked as sensitive, follow the link above to view."
                            if "https://twitter.com/" in ori and "/video/" in ori:
                                url = ori
                                url = url.replace("twitter.com",
                                                  "mobile.twitter.com")
                                pattern = re.compile(
                                    r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
                                )
                                url = re.findall(pattern, url)
                                url = url[0]
                                img_src = requests.get(url)
                                img_html = BeautifulSoup(
                                    img_src.text, "html.parser")
                                img = img_html.select("div.media")
                                img = str(img)
                                img_url = re.findall(pattern, img)
                                tweet_img = img_url[0]
                                tweet_text = str(tweet_text).replace(
                                    str(ori), "")
                                if "support.twitter.com" in tweet_img:
                                    tweet_img = "null"
                                    tweet_text = tweet_text + ori + "\n This media is marked as sensitive, follow the link above to view."
                            if "https://twitter.com/" in ori and "/status/" in ori:
                                if "/video/" in ori:
                                    print("")
                                else:
                                    if "/photo/" in ori:
                                        print("")
                                    else:
                                        url = ori
                                        url = url.replace(
                                            "twitter.com",
                                            "mobile.twitter.com")
                                        pattern = re.compile(
                                            r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
                                        )
                                        url = re.findall(pattern, url)
                                        url = url[0]
                                        ori_tweet = requests.get(url)
                                        ori_html = BeautifulSoup(
                                            ori_tweet.text, "html.parser")
                                        ori_post = ori_html.select(
                                            "div.tweet-text")[0]
                                        ori_auth = ori_html.select(
                                            "div.fullname")[0]
                                        ori_post = str(ori_post.get_text())
                                        ori_auth = str(ori_auth.get_text())
                                        ori_post = ori_post.replace("\n", "")
                                        ori_auth = ori_auth.replace("\n", "")
                                        tweet_text = "Retweeted and replied to " + ori_auth + "'s tweet\n(" + ori_post + ")\nAbove is original post\n" + tweet_text
                                        tweet_text = str(tweet_text).replace(
                                            str(ori), "")

                    if tweet_body[count].select("span.context"):
                        tweet_context = tweet_body[count].select(
                            "span.context")[0]
                        tweet_context = tweet_context.get_text()
                        ori_author = tweet_body[count].select(
                            "strong.fullname")[0]
                        ori_author = str(ori_author.get_text())
                        tweet_text = "Retweeted " + ori_author + "'s tweet: \n" + tweet_text
                    if tweet_body[count].select("div.tweet-reply-context"):
                        re_context = tweet_body[count].select(
                            "div.tweet-reply-context")[0]
                        re_context = str(re_context.get_text())
                        re_context = re_context.replace("\n", "")
                        if ori:
                            url = ori
                            url = url.replace("twitter.com",
                                              "mobile.twitter.com")
                            pattern = re.compile(
                                r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
                            )
                            url = re.findall(pattern, url)
                            url = url[0]
                            ori_tweet = requests.get(url)
                            ori_html = BeautifulSoup(ori_tweet.text,
                                                     "html.parser")
                            ori_post = ori_html.select("div.tweet-text")[0]
                            ori_auth = ori_html.select("div.fullname")[0]
                            ori_post = str(ori_post.get_text())
                            ori_auth = str(ori_auth.get_text())
                            ori_post = ori_post.replace("\n", "")
                            ori_auth = ori_auth.replace("\n", "")
                            tweet_text = str(tweet_text).replace(str(ori), "")
                            tweet_text = re_context + "\n(" + ori_post + ")\nAbove is original post\n" + tweet_text
                        else:
                            tweet_text = re_context + "\n" + tweet_text
                    ori = None

                    count += 1
                    if helpers._config(
                            "TT_MODE") == "many-to-one" or helpers._config(
                                "TT_MODE") == "many-to-many":
                        author = html.select("table.profile-details"
                                             )[0].select("div.fullname")[0]
                        user = str(author.get_text())
                        user = user.replace("\n", "")
                        tweet_text = user + ' said: \n' + tweet_text
                    tweets[tweet_time] = {
                        "id": tweet_id,
                        "text": tweet_text,
                        "img": tweet_img
                    }

            except Exception as e:

                logger.error("get_tweets() => An error occurred.")
                logger.error(e)

                continue

        return ({k: tweets[k]
                 for k in sorted(tweets, reverse=True)}
                if len(tweets) > 0 else None)