Beispiel #1
0
def get_klout_topics(tweet, topic_type='influence'):
    """ 
    Return the user's chosen Klout topics (a list of dicts), if it exists.

    Regardless of the format or topic type, the topic dicts will have the same keys:
        url, id, name, score 
    """ 
    try:
        # check that the dict paths exist 
        if is_original_format(tweet):
            topics = tweet['user']['derived']['klout']['{}_topics'.format(topic_type)]
        else:
            topics = tweet['gnip']['klout_profile']['topics']
    except KeyError:
        return None
    # since we have topics, collect the right pieces 
    topics_list = []
    if is_original_format(tweet):
        for topic in topics:
            # note: this is the same as the current structure of OF 
            #  payloads, but is written out for consistency w/ AS payloads
            this_topic = dict(url=topic['url'], 
                                id=topic['id'],
                                name=topic['name'],
                                score=topic['score'])
            topics_list.append(this_topic) 
    else:
        relevant_topics = [x for x in topics if x['topic_type'] == topic_type] 
        for topic in relevant_topics:
            this_topic = dict(url=topic['link'], 
                                id=topic['id'],
                                name=topic['displayName'],
                                score=topic['score'])
            topics_list.append(this_topic) 
    return topics_list 
Beispiel #2
0
def get_in_reply_to_user_id(tweet):
    """
    Get the user id of the uesr whose Tweet is being replied to, and None
    if this Tweet is not a reply. \n
    Note that this is unavailable in activity-streams format

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        str: the user id of the user whose Tweet is being replied to, None
        (if not a reply), or for activity-streams raise a NotAvailableError

    Example:
        >>> from tweet_parser.getter_methods.tweet_reply import *
        >>> original_format_dict = {
        ...             "created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "in_reply_to_user_id_str": "2382763597"
        ...            }
        >>> get_in_reply_to_user_id(original_format_dict)
        '2382763597'
    """

    if is_original_format(tweet):
        return tweet["in_reply_to_user_id_str"]
    else:
        raise NotAvailableError("Gnip activity-streams format does not" +
                                " return the replied to user's id")
Beispiel #3
0
def get_favorite_count(tweet):
    """
    Gets the favorite count for this tweet.

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        int: The number of times the Tweet has been favorited

    Example:
        >>> from tweet_parser.getter_methods.tweet_counts import get_favorite_count
        >>> tweet = {'created_at': '2017-21-23T15:21:21.000Z',
        ...          'id_str': '2382763597',
        ...          'favorite_count': 2}
        >>> get_favorite_count(tweet)
        2
        
        >>> activity_streams_tweet = {'postedTime': '2017-05-24T20:17:19.000Z',
        ...                           'favoritesCount': 3}
        >>> get_favorite_count(activity_streams_tweet)
        3
    """
    if is_original_format(tweet):
        return tweet.get("favorite_count", 0)
    else:
        return tweet.get("favoritesCount", 0)
Beispiel #4
0
def get_media_entities(tweet):
    """
    Grabs all the media entities from a tweet, which are contained in the
    "extended_entities" or "twitter_extended_entities" field depending on the
    tweet format. Note that this is not the same as the first media entity from
    the basic `entities` key; this is required to get *all* of the potential
    media contained within a tweet. This is useful as an entry point for other
    functions or for any custom parsing that needs to be done.

    Args:
        tweet (Tweet or dict): the tweet in question

    Returns:
        list or None: the list of dicts containing each media's metadata in the
        tweet.

    Example:
        >>> from tweet_parser.getter_methods.tweet_entities import get_media_entities
        >>> tweet = {'created_at': '2017-21-23T15:21:21.000Z',
        ...          'entities': {'user_mentions': [{'id': 2382763597,
        ...          'id_str': '2382763597',
        ...          'indices': [14, 26],
        ...          'name': 'Fiona',
        ...          'screen_name': 'notFromShrek'}]},
        ...          'extended_entities': {'media': [{'display_url': 'pic.twitter.com/something',
        ...          'expanded_url': 'https://twitter.com/something',
        ...          'id': 4242,
        ...          'id_str': '4242',
        ...          'indices': [88, 111],
        ...          'media_url': 'http://pbs.twimg.com/media/something.jpg',
        ...          'media_url_https': 'https://pbs.twimg.com/media/something.jpg',
        ...          'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600},
        ...          'medium': {'h': 799, 'resize': 'fit', 'w': 1200},
        ...          'small': {'h': 453, 'resize': 'fit', 'w': 680},
        ...          'thumb': {'h': 150, 'resize': 'crop', 'w': 150}},
        ...          'type': 'photo',
        ...          'url': 'https://t.co/something'},
        ...          {'display_url': 'pic.twitter.com/something_else',
        ...          'expanded_url': 'https://twitter.com/user/status/something/photo/1',
        ...          'id': 4243,
        ...          'id_str': '4243',
        ...          'indices': [88, 111],
        ...          'media_url': 'http://pbs.twimg.com/media/something_else.jpg',
        ...          'media_url_https': 'https://pbs.twimg.com/media/something_else.jpg',
        ...          'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600},
        ...          'medium': {'h': 799, 'resize': 'fit', 'w': 1200},
        ...          'small': {'h': 453, 'resize': 'fit', 'w': 680},
        ...          'thumb': {'h': 150, 'resize': 'crop', 'w': 150}},
        ...          'type': 'photo',
        ...          'url': 'https://t.co/something_else'}]}
        ...         }
        >>> get_media_entities(tweet)
        [{'display_url': 'pic.twitter.com/something', 'expanded_url': 'https://twitter.com/something', 'id': 4242, 'id_str': '4242', 'indices': [88, 111], 'media_url': 'http://pbs.twimg.com/media/something.jpg', 'media_url_https': 'https://pbs.twimg.com/media/something.jpg', 'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600}, 'medium': {'h': 799, 'resize': 'fit', 'w': 1200}, 'small': {'h': 453, 'resize': 'fit', 'w': 680}, 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, 'type': 'photo', 'url': 'https://t.co/something'}, {'display_url': 'pic.twitter.com/something_else', 'expanded_url': 'https://twitter.com/user/status/something/photo/1', 'id': 4243, 'id_str': '4243', 'indices': [88, 111], 'media_url': 'http://pbs.twimg.com/media/something_else.jpg', 'media_url_https': 'https://pbs.twimg.com/media/something_else.jpg', 'sizes': {'large': {'h': 1065, 'resize': 'fit', 'w': 1600}, 'medium': {'h': 799, 'resize': 'fit', 'w': 1200}, 'small': {'h': 453, 'resize': 'fit', 'w': 680}, 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, 'type': 'photo', 'url': 'https://t.co/something_else'}]
    """

    ext_ents_key = "extended_entities" if is_original_format(
        tweet) else "twitter_extended_entities"
    ext_ents = tweet.get(ext_ents_key)
    media = ext_ents.get("media", []) if ext_ents else []
    return media
Beispiel #5
0
def get_user_id(tweet):
    """
    Get the Twitter ID of the user who posted the Tweet

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        str: the Twitter ID of the user who posted the Tweet

    Example:
        >>> from tweet_parser.getter_methods.tweet_user import get_user_id
        >>> original_format_dict = {
        ...             "created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "user":
        ...              {"id_str": "815279070241955840"}
        ...            }
        >>> get_user_id(original_format_dict)
        '815279070241955840'

        >>> activity_streams_format_dict = {
        ...             "postedTime": "2017-05-24T20:17:19.000Z",
        ...             "actor":
        ...              {"id": "id:twitter.com:815279070241955840"}
        ...             }
        >>> get_user_id(activity_streams_format_dict)
        '815279070241955840'
    """

    if is_original_format(tweet):
        return tweet["user"]["id_str"]
    else:
        return tweet["actor"]["id"].split(":")[-1]
Beispiel #6
0
def get_name(tweet):
    """
    Get the display name of the user who posted the Tweet

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        str: the @ handle of the user who posted the Tweet

    Example:
        >>> from tweet_parser.getter_methods.tweet_user import get_name
        >>> original_format_dict = {
        ...             "created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "user":
        ...              {"name": "jk no"}
        ...            }
        >>> get_name(original_format_dict)
        'jk no'

        >>> activity_streams_format_dict = {
        ...             "postedTime": "2017-05-24T20:17:19.000Z",
        ...             "actor":
        ...              {"displayName": "jk no"}
        ...             }
        >>> get_name(activity_streams_format_dict)
        'jk no'
    """

    if is_original_format(tweet):
        return tweet["user"]["name"]
    else:
        return tweet["actor"]["displayName"]
Beispiel #7
0
def get_screen_name(tweet):
    """
    Get the screen name (@ handle) of the user who posted the Tweet

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        str: the @ handle of the user who posted the Tweet

    Example:
        >>> from tweet_parser.getter_methods.tweet_user import get_screen_name
        >>> original_format_dict = {
        ...             "created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "user":
        ...              {"screen_name": "RobotPrincessFi"}
        ...            }
        >>> get_screen_name(original_format_dict)
        'RobotPrincessFi'

        >>> activity_streams_format_dict = {
        ...             "postedTime": "2017-05-24T20:17:19.000Z",
        ...             "actor":
        ...              {"preferredUsername": "******"}
        ...             }
        >>> get_screen_name(activity_streams_format_dict)
        'RobotPrincessFi'
    """

    if is_original_format(tweet):
        return tweet["user"]["screen_name"]
    else:
        return tweet["actor"]["preferredUsername"]
Beispiel #8
0
def get_quote_count(tweet):
    """
    Gets the quote count for this tweet. \n 
    Note that this is unavailable in activity-streams format

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        int: The number of times the Tweet has been quoted
        or for activity-streams raise a NotAvailableError

    Example:
        >>> from tweet_parser.getter_methods.tweet_counts import get_quote_count
        >>> tweet = {'created_at': '2017-21-23T15:21:21.000Z',
        ...          'id_str': '2382763597',
        ...          'quote_count': 2}
        >>> get_quote_count(tweet)
        2
    """
    if is_original_format(tweet):
        return tweet.get("quote_count", 0)
    else:
        raise NotAvailableError(
            "Quote counts are only available in original format")
Beispiel #9
0
def get_tweet_type(tweet):
    """
    Get the type of Tweet this is (3 options: tweet, quote, and retweet)

    Args:
        tweet (Tweet or dict): A Tweet object or dictionary

    Returns:
        str: (one of 3 strings)
        "tweet": an original Tweet
        "retweet": a native retweet (created with the retweet button)
        "quote": a native quote tweet (etweet button + adding quote text)

    Caveats:
        When a quote-tweet (tweet A) is quote-tweeted (tweet B),
        the innermost quoted tweet (A) in the payload (for B)
        no longer has the key "quoted_status" or "twitter_quoted_status",
        and that tweet (A) would be labeled as a "tweet" (not a "quote").
    """
    if is_original_format(tweet):
        if "retweeted_status" in tweet:
            return "retweet"
        elif "quoted_status" in tweet:
            return "quote"
        else:
            return "tweet"
    else:
        if tweet["verb"] == "share":
            return "retweet"
        else:
            if "twitter_quoted_status" in tweet:
                return "quote"
            else:
                return "tweet"
Beispiel #10
0
def get_text(tweet):
    """
    Get the contents of "text" (original format)
    or "body" (activity streams format)

    Args:
        tweet (Tweet or dict): A Tweet object or dictionary

    Returns:
        str: the contents of "text" key (original format)
        or "body" key (activity streams format)

    Example:
        >>> from tweet_parser.getter_methods.tweet_text import get_text
        >>> original = {
        ...             "created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "text": "some tweet text"}
        >>> get_text(original)
        'some tweet text'

        >>> activity = {"postedTime": "2017-05-24T20:17:19.000Z",
        ...             "body": "some tweet text"}
        >>> get_text(activity)
        'some tweet text'
    """
    if is_original_format(tweet):
        if "text" in tweet:
            return tweet["text"]
        else:
            return tweet["full_text"]
    else:
        return tweet["body"]
Beispiel #11
0
def get_lang(tweet):
    """
    Get the language that the Tweet is written in.

    Args:
        tweet (Tweet or dict): A Tweet object or dictionary

    Returns:
        str: 2-letter BCP 47 language code (or None if undefined)

    Example:
        >>> from tweet_parser.getter_methods.tweet_text import get_lang
        >>> original = {"created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "lang": "en"}
        >>> get_lang(original)
        'en'

        >>> activity = {"postedTime": "2017-05-24T20:17:19.000Z",
        ...             "twitter_lang": "en"}
        >>> get_lang(activity)
        'en'
    """
    if is_original_format(tweet):
        lang_field = "lang"
    else:
        lang_field = "twitter_lang"
    if tweet[lang_field] is not None and tweet[lang_field] != "und":
        return tweet[lang_field]
    else:
        return None
Beispiel #12
0
def get_profile_location(tweet):
    """
    Get user's derived location data from the profile location enrichment
    If unavailable, returns None.

    Args:
        tweet (Tweet or dict): Tweet object or dictionary

    Returns:
        dict: more information on the profile locations enrichment here:
        http://support.gnip.com/enrichments/profile_geo.html

    Example:
        >>> result = {"country": "US",         # Two letter ISO-3166 country code
        ...           "locality": "Boulder",   # The locality location (~ city)
        ...           "region": "Colorado",    # The region location (~ state/province)
        ...           "sub_region": "Boulder", # The sub-region location (~ county)
        ...           "full_name": "Boulder, Colorado, US", # The full name (excluding sub-region)
        ...           "geo":  [40,-105]        # lat/long value that coordinate that corresponds to
        ...                            # the lowest granularity location for where the user
        ...                            # who created the Tweet is from
        ...  }

    Caveats:
        This only returns the first element of the 'locations' list.
        I'm honestly not sure what circumstances would result in a list that
        is more than one element long.
    """
    if is_original_format(tweet):
        try:
            return tweet["user"]["derived"]["locations"][0]
        except KeyError:
            return None
    else:
        try:
            location = tweet["gnip"]["profileLocations"][0]
            reconstructed_original_format = {}
            if location["address"].get("country", None) is not None:
                reconstructed_original_format["country"] = location["address"][
                    "country"]
            if location["address"].get("countryCode", None) is not None:
                reconstructed_original_format["country_code"] = location[
                    "address"]["countryCode"]
            if location["address"].get("locality", None) is not None:
                reconstructed_original_format["locality"] = location[
                    "address"]["locality"]
            if location["address"].get("region", None) is not None:
                reconstructed_original_format["region"] = location["address"][
                    "region"]
            if location["address"].get("subRegion", None) is not None:
                reconstructed_original_format["sub_region"] = location[
                    "address"]["subRegion"]
            if location.get("displayName", None) is not None:
                reconstructed_original_format["full_name"] = location[
                    "displayName"]
            if location.get("geo", None) is not None:
                reconstructed_original_format["geo"] = location["geo"]
            return reconstructed_original_format
        except KeyError:
            return None
Beispiel #13
0
def get_following_count(tweet):
    """
    Get the number of accounts that the user is following

    Args:
        tweet (Tweet): A Tweet object (or a dictionary)

    Returns:
        int: the number of accounts that the user is following

    Example:
        >>> from tweet_parser.getter_methods.tweet_user import get_following_count
        >>> original_format_dict = {
        ...             "created_at": "Wed May 24 20:17:19 +0000 2017",
        ...             "user":
        ...              {"friends_count": 2}
        ...            }
        >>> get_following_count(original_format_dict)
        2

        >>> activity_streams_format_dict = {
        ...             "postedTime": "2017-05-24T20:17:19.000Z",
        ...             "actor":
        ...              {"friendsCount": 2}
        ...             }
        >>> get_following_count(activity_streams_format_dict)
        2    
    """
    if is_original_format(tweet):
        return tweet["user"]["friends_count"]
    else:
        return tweet["actor"]["friendsCount"]
Beispiel #14
0
def get_user_id(tweet):
    """
    get the user id, as a string
    """
    if is_original_format(tweet):
        return tweet["user"]["id_str"]
    else:
        return tweet["actor"]["id"].split(":")[-1]
Beispiel #15
0
 def id(self):
     """
     return the Tweet id as a string
     """
     if tweet_checking.is_original_format(self):
         return self["id_str"]
     else:
         return self["id"].split(":")[-1]
Beispiel #16
0
def get_screen_name(tweet):
    """
    get the user screen name (@ handle)
    """
    if is_original_format(tweet):
        return tweet["user"]["screen_name"]
    else:
        return tweet["actor"]["preferredUsername"]
Beispiel #17
0
def get_text(tweet):
    """
    literally the contents of 'text' or 'body'
    """
    if is_original_format(tweet):
        return tweet["text"]
    else:
        return tweet["body"]
Beispiel #18
0
def get_quote_or_rt_text(tweet):
    """
    the text of a quote tweet or a retweet
    """
    tweet_type = tweet.tweet_type
    if tweet_type == "tweet":
        return ""
    if tweet_type == "quote":
        if is_original_format(tweet):
            return get_full_text(tweet["quoted_status"])
        else:
            return get_full_text(tweet["twitter_quoted_status"])
    if tweet_type == "retweet":
        if is_original_format(tweet):
            return get_full_text(tweet["retweeted_status"])
        else:
            return get_full_text(tweet["object"])
Beispiel #19
0
def get_name(tweet):
    """
    get the user's display name
    """
    if is_original_format(tweet):
        return tweet["user"]["name"]
    else:
        return tweet["actor"]["displayName"]
Beispiel #20
0
def get_quote_or_rt_text(tweet):
    """
    Get the quoted or retweeted text in a Tweet
    (this is not the text entered by the posting user)
    - tweet: empty string (there is no quoted or retweeted text)
    - quote: only the text of the quoted Tweet
    - retweet: the text of the retweet

    Args:
        tweet (Tweet or dict): A Tweet object or dictionary

    Returns:
        str: text of the retweeted-tweet or the quoted-tweet
        (empty string if this is an original Tweet)

    Example:
        >>> from tweet_parser.getter_methods.tweet_text import get_quote_or_rt_text
        >>> # a quote tweet
        >>> quote = {"created_at": "Wed May 24 20:17:19 +0000 2017",
        ...          "text": "adding my own commentary",
        ...          "truncated": False,
        ...          "quoted_status": {
        ...                 "created_at": "Mon May 01 05:00:05 +0000 2017",
        ...                 "truncated": False,
        ...                 "text": "an interesting Tweet"
        ...                }
        ...         }

        >>> get_quote_or_rt_text(quote)
        'an interesting Tweet'
    """
    tweet_type = get_tweet_type(tweet)
    if tweet_type == "tweet":
        return ""
    if tweet_type == "quote":
        if is_original_format(tweet):
            return get_full_text(tweet["quoted_status"])
        else:
            return get_full_text(tweet["twitter_quoted_status"])
    if tweet_type == "retweet":
        if is_original_format(tweet):
            return get_full_text(tweet["retweeted_status"])
        else:
            return get_full_text(tweet["object"])
Beispiel #21
0
def get_retweet(tweet):
    """
    get the retweet and return the dict
    """
    if tweet.tweet_type == "retweet":
        if is_original_format(tweet):
            return tweet["retweeted_status"]
        else:
            return tweet["object"]
    else:
        return None
Beispiel #22
0
def get_all_text(tweet):
    """
    all of the text of the tweet
    Includes @ mentions, long links,
    quote-tweet contents (separated by a newline) & RT contents
    & poll options
    """
    if is_original_format(tweet):
        return "\n".join(filter(None, [tweet.user_entered_text, tweet.quote_or_rt_text, "\n".join(tweet.poll_options)]))
    else:
        return "\n".join(filter(None, [tweet.user_entered_text, tweet.quote_or_rt_text]))
Beispiel #23
0
def get_user_mentions(tweet):
    """
    get a list of @ mention dicts from the tweet
    """
    if is_original_format(tweet):
        entities = "entities"
    else:
        entities = "twitter_entities"
    if tweet[entities]["user_mentions"] is not None:
        return tweet[entities]["user_mentions"]
    else:
        return []
Beispiel #24
0
def get_klout_profile(tweet):
    """ 
    Return the user's Klout profile URL (an str), if it exists.
    """ 
    try:
        if is_original_format(tweet):
            profile = tweet['user']['derived']['klout']['profile_url']
        else:
            profile = tweet['gnip']['klout_profile']['link']
        return profile
    except KeyError:
        return None
Beispiel #25
0
def get_klout_score(tweet):
    """ 
    Return the user's Klout score (an int), if it exists.
    """ 
    try:
        if is_original_format(tweet):
            score = tweet['user']['derived']['klout']['score']
        else:
            score = tweet['gnip']['klout_score']
        return score
    except KeyError:
        return None
Beispiel #26
0
def get_klout_id(tweet):
    """ 
    Return the user's Klout id (an str), if it exists.
    """ 
    try:
        if is_original_format(tweet):
            klout_id = tweet['user']['derived']['klout']['user_id']
        else:
            klout_id = tweet['gnip']['klout_profile']['klout_user_id']
        return klout_id
    except KeyError:
        return None
Beispiel #27
0
def get_quote_tweet(tweet):
    """
    get the quote Tweet and return the dict
    """
    if tweet.tweet_type == "quote":
        if is_original_format(tweet):
            return tweet["quoted_status"]
        else:
            return tweet["twitter_quoted_status"]

    else:
        return None
Beispiel #28
0
def get_quoted_mentions(tweet):
    """
    users mentioned in the quoted Tweet don't get included
    which doesn't seem that intuitive, so I'm adding a getter to add them
    """
    if tweet.tweet_type == "quote":
        quoted_status_loc = "quoted_status"
        if not is_original_format(tweet):
            quoted_status_loc = "twitter_quoted_status"
        return get_user_mentions(tweet[quoted_status_loc])
    else:
        return []
Beispiel #29
0
def get_hashtags(tweet):
    """
    get a list of hashtags
    """
    if is_original_format(tweet):
        entities = "entities"
    else:
        entities = "twitter_entities"
    if tweet[entities]["user_mentions"] is not None:
        return [x["text"] for x in tweet[entities]["hashtags"]]
    else:
        return []
Beispiel #30
0
def get_full_text(tweet):
    """
    get the full text of a tweet dict or of the sub-dict in a quote/RT
    """
    if is_original_format(tweet):
        if tweet["truncated"]:
            return tweet["extended_tweet"]["full_text"]
        else:
            return tweet["text"]
    else:
        if "long_object" in tweet:
            return tweet["long_object"]["body"]
        else:
            return tweet["body"]