def get_klout_topics(tweet, topic_type='influence'): """ Return the user's chosen Klout topics (a list of dicts), if it exists. Regardless of the format or topic type, the topic dicts will have the same keys: url, id, name, score """ try: # check that the dict paths exist if is_original_format(tweet): topics = tweet['user']['derived']['klout']['{}_topics'.format(topic_type)] else: topics = tweet['gnip']['klout_profile']['topics'] except KeyError: return None # since we have topics, collect the right pieces topics_list = [] if is_original_format(tweet): for topic in topics: # note: this is the same as the current structure of OF # payloads, but is written out for consistency w/ AS payloads this_topic = dict(url=topic['url'], id=topic['id'], name=topic['name'], score=topic['score']) topics_list.append(this_topic) else: relevant_topics = [x for x in topics if x['topic_type'] == topic_type] for topic in relevant_topics: this_topic = dict(url=topic['link'], id=topic['id'], name=topic['displayName'], score=topic['score']) topics_list.append(this_topic) return topics_list
def get_in_reply_to_user_id(tweet): """ Get the user id of the uesr whose Tweet is being replied to, and None if this Tweet is not a reply. \n Note that this is unavailable in activity-streams format Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: str: the user id of the user whose Tweet is being replied to, None (if not a reply), or for activity-streams raise a NotAvailableError Example: >>> from tweet_parser.getter_methods.tweet_reply import * >>> original_format_dict = { ... "created_at": "Wed May 24 20:17:19 +0000 2017", ... "in_reply_to_user_id_str": "2382763597" ... } >>> get_in_reply_to_user_id(original_format_dict) '2382763597' """ if is_original_format(tweet): return tweet["in_reply_to_user_id_str"] else: raise NotAvailableError("Gnip activity-streams format does not" + " return the replied to user's id")
def get_favorite_count(tweet): """ Gets the favorite count for this tweet. Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: int: The number of times the Tweet has been favorited Example: >>> from tweet_parser.getter_methods.tweet_counts import get_favorite_count >>> tweet = {'created_at': '2017-21-23T15:21:21.000Z', ... 'id_str': '2382763597', ... 'favorite_count': 2} >>> get_favorite_count(tweet) 2 >>> activity_streams_tweet = {'postedTime': '2017-05-24T20:17:19.000Z', ... 'favoritesCount': 3} >>> get_favorite_count(activity_streams_tweet) 3 """ if is_original_format(tweet): return tweet.get("favorite_count", 0) else: return tweet.get("favoritesCount", 0)
def get_media_entities(tweet): """ Grabs all the media entities from a tweet, which are contained in the "extended_entities" or "twitter_extended_entities" field depending on the tweet format. Note that this is not the same as the first media entity from the basic `entities` key; this is required to get *all* of the potential media contained within a tweet. This is useful as an entry point for other functions or for any custom parsing that needs to be done. Args: tweet (Tweet or dict): the tweet in question Returns: list or None: the list of dicts containing each media's metadata in the tweet. Example: >>> from tweet_parser.getter_methods.tweet_entities import get_media_entities >>> tweet = {'created_at': '2017-21-23T15:21:21.000Z', ... 'entities': {'user_mentions': [{'id': 2382763597, ... 'id_str': '2382763597', ... 'indices': [14, 26], ... 'name': 'Fiona', ... 'screen_name': 'notFromShrek'}]}, ... 'extended_entities': {'media': [{'display_url': 'pic.twitter.com/something', ... 'expanded_url': 'https://twitter.com/something', ... 'id': 4242, ... 'id_str': '4242', ... 'indices': [88, 111], ... 'media_url': 'http://pbs.twimg.com/media/something.jpg', ... 'media_url_https': 'https://pbs.twimg.com/media/something.jpg', ... 'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600}, ... 'medium': {'h': 799, 'resize': 'fit', 'w': 1200}, ... 'small': {'h': 453, 'resize': 'fit', 'w': 680}, ... 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, ... 'type': 'photo', ... 'url': 'https://t.co/something'}, ... {'display_url': 'pic.twitter.com/something_else', ... 'expanded_url': 'https://twitter.com/user/status/something/photo/1', ... 'id': 4243, ... 'id_str': '4243', ... 'indices': [88, 111], ... 'media_url': 'http://pbs.twimg.com/media/something_else.jpg', ... 'media_url_https': 'https://pbs.twimg.com/media/something_else.jpg', ... 'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600}, ... 'medium': {'h': 799, 'resize': 'fit', 'w': 1200}, ... 'small': {'h': 453, 'resize': 'fit', 'w': 680}, ... 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, ... 'type': 'photo', ... 'url': 'https://t.co/something_else'}]} ... } >>> get_media_entities(tweet) [{'display_url': 'pic.twitter.com/something', 'expanded_url': 'https://twitter.com/something', 'id': 4242, 'id_str': '4242', 'indices': [88, 111], 'media_url': 'http://pbs.twimg.com/media/something.jpg', 'media_url_https': 'https://pbs.twimg.com/media/something.jpg', 'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600}, 'medium': {'h': 799, 'resize': 'fit', 'w': 1200}, 'small': {'h': 453, 'resize': 'fit', 'w': 680}, 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, 'type': 'photo', 'url': 'https://t.co/something'}, {'display_url': 'pic.twitter.com/something_else', 'expanded_url': 'https://twitter.com/user/status/something/photo/1', 'id': 4243, 'id_str': '4243', 'indices': [88, 111], 'media_url': 'http://pbs.twimg.com/media/something_else.jpg', 'media_url_https': 'https://pbs.twimg.com/media/something_else.jpg', 'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600}, 'medium': {'h': 799, 'resize': 'fit', 'w': 1200}, 'small': {'h': 453, 'resize': 'fit', 'w': 680}, 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, 'type': 'photo', 'url': 'https://t.co/something_else'}] """ ext_ents_key = "extended_entities" if is_original_format( tweet) else "twitter_extended_entities" ext_ents = tweet.get(ext_ents_key) media = ext_ents.get("media", []) if ext_ents else [] return media
def get_user_id(tweet): """ Get the Twitter ID of the user who posted the Tweet Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: str: the Twitter ID of the user who posted the Tweet Example: >>> from tweet_parser.getter_methods.tweet_user import get_user_id >>> original_format_dict = { ... "created_at": "Wed May 24 20:17:19 +0000 2017", ... "user": ... {"id_str": "815279070241955840"} ... } >>> get_user_id(original_format_dict) '815279070241955840' >>> activity_streams_format_dict = { ... "postedTime": "2017-05-24T20:17:19.000Z", ... "actor": ... {"id": "id:twitter.com:815279070241955840"} ... } >>> get_user_id(activity_streams_format_dict) '815279070241955840' """ if is_original_format(tweet): return tweet["user"]["id_str"] else: return tweet["actor"]["id"].split(":")[-1]
def get_name(tweet): """ Get the display name of the user who posted the Tweet Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: str: the @ handle of the user who posted the Tweet Example: >>> from tweet_parser.getter_methods.tweet_user import get_name >>> original_format_dict = { ... "created_at": "Wed May 24 20:17:19 +0000 2017", ... "user": ... {"name": "jk no"} ... } >>> get_name(original_format_dict) 'jk no' >>> activity_streams_format_dict = { ... "postedTime": "2017-05-24T20:17:19.000Z", ... "actor": ... {"displayName": "jk no"} ... } >>> get_name(activity_streams_format_dict) 'jk no' """ if is_original_format(tweet): return tweet["user"]["name"] else: return tweet["actor"]["displayName"]
def get_screen_name(tweet): """ Get the screen name (@ handle) of the user who posted the Tweet Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: str: the @ handle of the user who posted the Tweet Example: >>> from tweet_parser.getter_methods.tweet_user import get_screen_name >>> original_format_dict = { ... "created_at": "Wed May 24 20:17:19 +0000 2017", ... "user": ... {"screen_name": "RobotPrincessFi"} ... } >>> get_screen_name(original_format_dict) 'RobotPrincessFi' >>> activity_streams_format_dict = { ... "postedTime": "2017-05-24T20:17:19.000Z", ... "actor": ... {"preferredUsername": "******"} ... } >>> get_screen_name(activity_streams_format_dict) 'RobotPrincessFi' """ if is_original_format(tweet): return tweet["user"]["screen_name"] else: return tweet["actor"]["preferredUsername"]
def get_quote_count(tweet): """ Gets the quote count for this tweet. \n Note that this is unavailable in activity-streams format Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: int: The number of times the Tweet has been quoted or for activity-streams raise a NotAvailableError Example: >>> from tweet_parser.getter_methods.tweet_counts import get_quote_count >>> tweet = {'created_at': '2017-21-23T15:21:21.000Z', ... 'id_str': '2382763597', ... 'quote_count': 2} >>> get_quote_count(tweet) 2 """ if is_original_format(tweet): return tweet.get("quote_count", 0) else: raise NotAvailableError( "Quote counts are only available in original format")
def get_tweet_type(tweet): """ Get the type of Tweet this is (3 options: tweet, quote, and retweet) Args: tweet (Tweet or dict): A Tweet object or dictionary Returns: str: (one of 3 strings) "tweet": an original Tweet "retweet": a native retweet (created with the retweet button) "quote": a native quote tweet (etweet button + adding quote text) Caveats: When a quote-tweet (tweet A) is quote-tweeted (tweet B), the innermost quoted tweet (A) in the payload (for B) no longer has the key "quoted_status" or "twitter_quoted_status", and that tweet (A) would be labeled as a "tweet" (not a "quote"). """ if is_original_format(tweet): if "retweeted_status" in tweet: return "retweet" elif "quoted_status" in tweet: return "quote" else: return "tweet" else: if tweet["verb"] == "share": return "retweet" else: if "twitter_quoted_status" in tweet: return "quote" else: return "tweet"
def get_text(tweet): """ Get the contents of "text" (original format) or "body" (activity streams format) Args: tweet (Tweet or dict): A Tweet object or dictionary Returns: str: the contents of "text" key (original format) or "body" key (activity streams format) Example: >>> from tweet_parser.getter_methods.tweet_text import get_text >>> original = { ... "created_at": "Wed May 24 20:17:19 +0000 2017", ... "text": "some tweet text"} >>> get_text(original) 'some tweet text' >>> activity = {"postedTime": "2017-05-24T20:17:19.000Z", ... "body": "some tweet text"} >>> get_text(activity) 'some tweet text' """ if is_original_format(tweet): if "text" in tweet: return tweet["text"] else: return tweet["full_text"] else: return tweet["body"]
def get_lang(tweet): """ Get the language that the Tweet is written in. Args: tweet (Tweet or dict): A Tweet object or dictionary Returns: str: 2-letter BCP 47 language code (or None if undefined) Example: >>> from tweet_parser.getter_methods.tweet_text import get_lang >>> original = {"created_at": "Wed May 24 20:17:19 +0000 2017", ... "lang": "en"} >>> get_lang(original) 'en' >>> activity = {"postedTime": "2017-05-24T20:17:19.000Z", ... "twitter_lang": "en"} >>> get_lang(activity) 'en' """ if is_original_format(tweet): lang_field = "lang" else: lang_field = "twitter_lang" if tweet[lang_field] is not None and tweet[lang_field] != "und": return tweet[lang_field] else: return None
def get_profile_location(tweet): """ Get user's derived location data from the profile location enrichment If unavailable, returns None. Args: tweet (Tweet or dict): Tweet object or dictionary Returns: dict: more information on the profile locations enrichment here: http://support.gnip.com/enrichments/profile_geo.html Example: >>> result = {"country": "US", # Two letter ISO-3166 country code ... "locality": "Boulder", # The locality location (~ city) ... "region": "Colorado", # The region location (~ state/province) ... "sub_region": "Boulder", # The sub-region location (~ county) ... "full_name": "Boulder, Colorado, US", # The full name (excluding sub-region) ... "geo": [40,-105] # lat/long value that coordinate that corresponds to ... # the lowest granularity location for where the user ... # who created the Tweet is from ... } Caveats: This only returns the first element of the 'locations' list. I'm honestly not sure what circumstances would result in a list that is more than one element long. """ if is_original_format(tweet): try: return tweet["user"]["derived"]["locations"][0] except KeyError: return None else: try: location = tweet["gnip"]["profileLocations"][0] reconstructed_original_format = {} if location["address"].get("country", None) is not None: reconstructed_original_format["country"] = location["address"][ "country"] if location["address"].get("countryCode", None) is not None: reconstructed_original_format["country_code"] = location[ "address"]["countryCode"] if location["address"].get("locality", None) is not None: reconstructed_original_format["locality"] = location[ "address"]["locality"] if location["address"].get("region", None) is not None: reconstructed_original_format["region"] = location["address"][ "region"] if location["address"].get("subRegion", None) is not None: reconstructed_original_format["sub_region"] = location[ "address"]["subRegion"] if location.get("displayName", None) is not None: reconstructed_original_format["full_name"] = location[ "displayName"] if location.get("geo", None) is not None: reconstructed_original_format["geo"] = location["geo"] return reconstructed_original_format except KeyError: return None
def get_following_count(tweet): """ Get the number of accounts that the user is following Args: tweet (Tweet): A Tweet object (or a dictionary) Returns: int: the number of accounts that the user is following Example: >>> from tweet_parser.getter_methods.tweet_user import get_following_count >>> original_format_dict = { ... "created_at": "Wed May 24 20:17:19 +0000 2017", ... "user": ... {"friends_count": 2} ... } >>> get_following_count(original_format_dict) 2 >>> activity_streams_format_dict = { ... "postedTime": "2017-05-24T20:17:19.000Z", ... "actor": ... {"friendsCount": 2} ... } >>> get_following_count(activity_streams_format_dict) 2 """ if is_original_format(tweet): return tweet["user"]["friends_count"] else: return tweet["actor"]["friendsCount"]
def get_user_id(tweet): """ get the user id, as a string """ if is_original_format(tweet): return tweet["user"]["id_str"] else: return tweet["actor"]["id"].split(":")[-1]
def id(self): """ return the Tweet id as a string """ if tweet_checking.is_original_format(self): return self["id_str"] else: return self["id"].split(":")[-1]
def get_screen_name(tweet): """ get the user screen name (@ handle) """ if is_original_format(tweet): return tweet["user"]["screen_name"] else: return tweet["actor"]["preferredUsername"]
def get_text(tweet): """ literally the contents of 'text' or 'body' """ if is_original_format(tweet): return tweet["text"] else: return tweet["body"]
def get_quote_or_rt_text(tweet): """ the text of a quote tweet or a retweet """ tweet_type = tweet.tweet_type if tweet_type == "tweet": return "" if tweet_type == "quote": if is_original_format(tweet): return get_full_text(tweet["quoted_status"]) else: return get_full_text(tweet["twitter_quoted_status"]) if tweet_type == "retweet": if is_original_format(tweet): return get_full_text(tweet["retweeted_status"]) else: return get_full_text(tweet["object"])
def get_name(tweet): """ get the user's display name """ if is_original_format(tweet): return tweet["user"]["name"] else: return tweet["actor"]["displayName"]
def get_quote_or_rt_text(tweet): """ Get the quoted or retweeted text in a Tweet (this is not the text entered by the posting user) - tweet: empty string (there is no quoted or retweeted text) - quote: only the text of the quoted Tweet - retweet: the text of the retweet Args: tweet (Tweet or dict): A Tweet object or dictionary Returns: str: text of the retweeted-tweet or the quoted-tweet (empty string if this is an original Tweet) Example: >>> from tweet_parser.getter_methods.tweet_text import get_quote_or_rt_text >>> # a quote tweet >>> quote = {"created_at": "Wed May 24 20:17:19 +0000 2017", ... "text": "adding my own commentary", ... "truncated": False, ... "quoted_status": { ... "created_at": "Mon May 01 05:00:05 +0000 2017", ... "truncated": False, ... "text": "an interesting Tweet" ... } ... } >>> get_quote_or_rt_text(quote) 'an interesting Tweet' """ tweet_type = get_tweet_type(tweet) if tweet_type == "tweet": return "" if tweet_type == "quote": if is_original_format(tweet): return get_full_text(tweet["quoted_status"]) else: return get_full_text(tweet["twitter_quoted_status"]) if tweet_type == "retweet": if is_original_format(tweet): return get_full_text(tweet["retweeted_status"]) else: return get_full_text(tweet["object"])
def get_retweet(tweet): """ get the retweet and return the dict """ if tweet.tweet_type == "retweet": if is_original_format(tweet): return tweet["retweeted_status"] else: return tweet["object"] else: return None
def get_all_text(tweet): """ all of the text of the tweet Includes @ mentions, long links, quote-tweet contents (separated by a newline) & RT contents & poll options """ if is_original_format(tweet): return "\n".join(filter(None, [tweet.user_entered_text, tweet.quote_or_rt_text, "\n".join(tweet.poll_options)])) else: return "\n".join(filter(None, [tweet.user_entered_text, tweet.quote_or_rt_text]))
def get_user_mentions(tweet): """ get a list of @ mention dicts from the tweet """ if is_original_format(tweet): entities = "entities" else: entities = "twitter_entities" if tweet[entities]["user_mentions"] is not None: return tweet[entities]["user_mentions"] else: return []
def get_klout_profile(tweet): """ Return the user's Klout profile URL (an str), if it exists. """ try: if is_original_format(tweet): profile = tweet['user']['derived']['klout']['profile_url'] else: profile = tweet['gnip']['klout_profile']['link'] return profile except KeyError: return None
def get_klout_score(tweet): """ Return the user's Klout score (an int), if it exists. """ try: if is_original_format(tweet): score = tweet['user']['derived']['klout']['score'] else: score = tweet['gnip']['klout_score'] return score except KeyError: return None
def get_klout_id(tweet): """ Return the user's Klout id (an str), if it exists. """ try: if is_original_format(tweet): klout_id = tweet['user']['derived']['klout']['user_id'] else: klout_id = tweet['gnip']['klout_profile']['klout_user_id'] return klout_id except KeyError: return None
def get_quote_tweet(tweet): """ get the quote Tweet and return the dict """ if tweet.tweet_type == "quote": if is_original_format(tweet): return tweet["quoted_status"] else: return tweet["twitter_quoted_status"] else: return None
def get_quoted_mentions(tweet): """ users mentioned in the quoted Tweet don't get included which doesn't seem that intuitive, so I'm adding a getter to add them """ if tweet.tweet_type == "quote": quoted_status_loc = "quoted_status" if not is_original_format(tweet): quoted_status_loc = "twitter_quoted_status" return get_user_mentions(tweet[quoted_status_loc]) else: return []
def get_hashtags(tweet): """ get a list of hashtags """ if is_original_format(tweet): entities = "entities" else: entities = "twitter_entities" if tweet[entities]["user_mentions"] is not None: return [x["text"] for x in tweet[entities]["hashtags"]] else: return []
def get_full_text(tweet): """ get the full text of a tweet dict or of the sub-dict in a quote/RT """ if is_original_format(tweet): if tweet["truncated"]: return tweet["extended_tweet"]["full_text"] else: return tweet["text"] else: if "long_object" in tweet: return tweet["long_object"]["body"] else: return tweet["body"]