Esempio n. 1
0
def stream_tweets(bearer_token):
    """
    Write a filtered stream of tweets directly to a new line
    delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M" format.

    Parameters
    ----------
    - bearer_token (str) : Twitter V2 bearer token.
    """
    print("Streaming tweets...")

    oauth2 = osometweet.OAuth2(
        bearer_token=bearer_token,
        manage_rate_limits=False
    )
    ot = osometweet.OsomeTweet(oauth2)

    # Add all tweet fields
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Add streaming rules
    rules = [{"value": "coronavirus", "tag": "all coronavirus tweets"},
             {"value": "indiana", "tag": "all indiana tweets"}]
    add_rules = {"add": rules}
    response = ot.set_filtered_stream_rule(rules=add_rules)
    print(f"API response from adding two rules:\n{response}\n")

    # Retrieve active streaming rules
    current_rules = ot.get_filtered_stream_rule()
    print(f'The current filtered stream rules are:\n{current_rules}\n')

    # Remove a streaming rule by using it's tag
    indiana_rule = [
        rule["id"] for rule in current_rules["data"]
        if 'all indiana tweets' in rule["tag"]
    ]
    delete_rule = {'delete': {'ids': indiana_rule}}
    response = ot.set_filtered_stream_rule(rules=delete_rule)
    print(f"API response from deleting one rule:\n{response}\n")

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for tweet errors.
    with open(f"tweet_stream--{today}.json", "a") as data_file:
        # stream is a Generator
        stream = ot.filtered_stream(fields=all_tweet_fields)
        # We have to iterate over the stream to fetch streamed tweets
        for tweet in stream.iter_lines():
            # Get data and errors
            try:
                data = json.loads(tweet).get("data")

                # When data is found, we write it to the open file
                if data:
                    json.dump(data, data_file)
                    data_file.write("\n")
            except json.JSONDecodeError:
                pass
Esempio n. 2
0
def initialize_osometweet(bearer_token):
    """
    Return an authorized osometweet API object
    from which we can make API calls.

    Parameters:
    ----------
    - bearer_token (str) : Your secret Twitter bearer token.
    """
    print("Initializing osometweet...")

    oauth2 = osometweet.OAuth2(
        bearer_token=bearer_token,
        manage_rate_limits=True  # Wait if Twitter sends rate limit message
    )
    return osometweet.OsomeTweet(oauth2)
Esempio n. 3
0
def gather_data(bearer_token, chunked_list):
    """
    Gather tweets based on the chunked list of tweet IDs with the provided
    bearer_token.
    """
    print("Gathering Data...")

    oauth2 = osometweet.OAuth2(bearer_token=bearer_token)
    ot = osometweet.OsomeTweet(oauth2)

    # Add all tweet fields
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for tweet errors.
    with open(f"tweet_data--{today}.json", 'w') as data_file,\
         open(f"tweet_errors--{today}.json", 'w') as error_file:

        # Iterate through the list of lists
        for one_hundred_tweets in chunked_list:
            response = ot.tweet_lookup(tids=one_hundred_tweets,
                                       fields=all_tweet_fields)

            # Get data and errors
            data = response["data"]
            errors = response["errors"]

            # No matter what `data` and `errors` will return something,
            # however, they may return `None` (i.e. no data/errors), which
            # will throw a TypeError.
            try:
                data_file.writelines(f"{json.dumps(line)}\n" for line in data)
            except TypeError:
                print("No data found in this set of tweets, "
                      "skipping to the next set.")

            try:
                error_file.writelines(f"{json.dumps(line)}\n"
                                      for line in errors)
            except TypeError:
                print("No problematic tweets found in this set of tweets, "
                      "skipping to the next set.")
Esempio n. 4
0
def stream_tweets(bearer_token):
    """
    Stream a 1% sample of tweets from twitter and write them directly to a
    new line delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M"
    format.

    Parameters
    ----------
    - bearer_token (str) : Twitter V2 bearer token.
    """
    print("Streaming tweets...")

    oauth2 = osometweet.OAuth2(bearer_token=bearer_token,
                               manage_rate_limits=False)
    ot = osometweet.OsomeTweet(oauth2)

    # Add all tweet fields
    all_tweet_fields = osometweet.TweetFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for tweet errors.
    with open(f"tweet_stream--{today}.json", "a") as data_file:
        # stream is a Generator
        stream = ot.sampled_stream(fields=all_tweet_fields)
        # We have to iterate over the stream to fetch streamed tweets
        for tweet in stream.iter_lines():
            # Get data and errors
            try:
                data = json.loads(tweet).get("data")

                # When data is found, we write it to the open file
                if data:
                    json.dump(data, data_file)
                    data_file.write("\n")
            except json.JSONDecodeError:
                pass
Esempio n. 5
0
 def setUp(self):
     oauth2 = osometweet.OAuth2(bearer_token=bearer_token)
     self.ot = osometweet.OsomeTweet(oauth2)
def gather_data(
        access_token,
        access_token_secret,
        api_key,
        api_key_secret,
        chunked_user_list
):
    """
    Gather user info based on the chunked list of usernames with the provided
    bearer_token.
    """
    print("Gathering Data...")

    oauth1a = osometweet.OAuth1a(
        api_key=api_key,
        api_key_secret=api_key_secret,
        access_token=access_token,
        access_token_secret=access_token_secret
        )
    ot = osometweet.OsomeTweet(oauth1a)

    # Add all user_fields
    all_user_fields = osometweet.UserFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for account errors.
    with open(f"account_data--{today}.json", 'w') as data_file,\
         open(f"account_errors--{today}.json", 'w') as error_file:

        # Iterate through the list of lists
        for one_hundred_users in chunked_user_list:
            response = ot.user_lookup_usernames(
                usernames=one_hundred_users,
                fields=all_user_fields
            )

            # Where as the user_ids endpoint always returns both "data" and
            # "errors", the username endpoint does the opposite - only
            # including these keys if data is present.
            if "data" in response:
                data = response["data"]
            else:
                data = None

            if "errors" in response:
                errors = response["errors"]
            else:
                errors = None

            try:
                data_file.writelines(f"{json.dumps(line)}\n" for line in data)
            except TypeError:
                print(
                    "No USER data found in this set of users, "
                    "skipping to the next set."
                )

            try:
                error_file.writelines(
                    f"{json.dumps(line)}\n" for line in errors
                )
            except TypeError:
                print(
                    "No problematic users found in this set of user, "
                    "skipping to the next set."
                )
Esempio n. 7
0
def gather_data(
        access_token,
        access_token_secret,
        api_key,
        api_key_secret,
        chunked_user_list
):
    """
    Gather user info based on the chunked list of user IDs with the provided
    bearer_token.
    """
    print("Gathering Data...")

    oauth1a = osometweet.OAuth1a(
        api_key=api_key,
        api_key_secret=api_key_secret,
        access_token=access_token,
        access_token_secret=access_token_secret
    )
    ot = osometweet.OsomeTweet(oauth1a)

    # Add all user_fields
    all_user_fields = osometweet.UserFields(everything=True)

    # Get today's date
    today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M")

    # Open two files. One for good data, the other for account errors.
    with open(f"account_data--{today}.json", 'w') as data_file,\
         open(f"account_errors--{today}.json", 'w') as error_file:

        # Iterate through the list of lists
        for one_hundred_users in chunked_user_list:
            response = ot.user_lookup_ids(
                user_ids=one_hundred_users,
                fields=all_user_fields
            )

            # Get data and errors
            data = response["data"]
            errors = response["errors"]

            # No matter what `data` and `errors` will return something,
            # however, they may return `None` (i.e. no data/errors), which
            # will throw a TypeError.
            try:
                data_file.writelines(f"{json.dumps(line)}\n" for line in data)
            except TypeError:
                print(
                    "No USER data found in this set of users, "
                    "skipping to the next set."
                )

            try:
                error_file.writelines(
                    f"{json.dumps(line)}\n" for line in errors
                )
            except TypeError:
                print(
                    "No problematic users found in this set of user, "
                    "skipping to the next set."
                )