def check_tweet(tweet, do_format_checking=False): # get the format of the Tweet & make sure it's probably a Tweet original_format = is_original_format(tweet) # make sure, to the best of our knowledge, that the Tweet is a Tweet if "id" not in tweet: raise (NotATweetError( "This text has no 'id' key, it's probably not a Tweet")) if original_format: # check to see if it's not a Tweet at all if "user" not in tweet: raise (NotATweetError( "This text has no 'user' key, it's probably not a Tweet")) if "text" not in tweet: raise (NotATweetError( "This text has no 'text' key, it's probably not a Tweet")) # check for changing keys if do_format_checking: check_format(get_all_keys(tweet), original_format_superset_keys, original_format_minimum_set_keys) else: # check to see if it's not a Tweet at all if "actor" not in tweet: raise (NotATweetError( "This text has no 'actor' key, it's probably not a Tweet")) if "body" not in tweet: raise (NotATweetError( "This text has no 'body' key, it's probably not a Tweet")) #check for changing keys if do_format_checking: check_format(get_all_keys(tweet), activity_streams_superset_keys, activity_streams_minimum_set_keys) return original_format
def _check_original_format_tweet(tweet, validation_checking=False): for key in ["user"]: if key not in tweet: raise NotATweetError("This dict has no '{}' key".format(key)) if "text" not in tweet and "full_text" not in tweet: raise NotATweetError( "This dict has no 'text' or 'full_text' key".format()) # check for changing keys if validation_checking: _ = key_validation_check(get_all_keys(tweet), original_format_superset_keys, original_format_minimum_set_keys)
def is_original_format(tweet): """ Simple checker to flag the format of a tweet. Args: tweet (Tweet): tweet in qustion Returns: Bool Example: >>> import tweet_parser.tweet_checking as tc >>> tweet = {"created_at": 124125125125, ... "text": "just setting up my twttr", ... "nested_field": {"nested_1": "field", "nested_2": "field2"}} >>> tc.is_original_format(tweet) True """ # deleted due to excess checking; it's a key lookup and does not need any # operational optimization if "created_at" in tweet: original_format = True elif "postedTime" in tweet: original_format = False else: raise NotATweetError( "This dict has neither 'created_at' or 'postedTime' as keys") return original_format
def _check_activity_streams_tweet(tweet, validation_checking=False): for key in ["actor", "body"]: if key not in tweet: raise NotATweetError("This dict has no '{}' key".format(key)) # check for changing keys if validation_checking: _ = key_validation_check(get_all_keys(tweet), activity_streams_superset_keys, activity_streams_minimum_set_keys)
def embedded_tweet(self): """ get the quote tweet or the retweet and return a tweet object of it """ embedded_tweet = tweet_embeds.get_embedded_tweet(self) if embedded_tweet is not None: try: return Tweet(embedded_tweet) except NotATweetError as nate: raise (NotATweetError( "The embedded tweet payload {} appears malformed. \nFailed with '{}'" .format(embedded_tweet, nate)))
def retweet(self): """ get the retweet and return a tweet obj of the retweet """ retweet = tweet_embeds.get_retweet(self) if retweet is not None: try: return Tweet(retweet) except NotATweetError as nate: raise (NotATweetError( "The retweet payload appears malformed. Failed with '{}'". format(nate)))
def is_original_format(tweet): """ helper function to categorize the format of a tweet """ if hasattr(tweet, "original_format"): return tweet.original_format else: if "created_at" in tweet: original_format = True elif "postedTime" in tweet: original_format = False else: raise (NotATweetError( "This text has neither 'created_at' or 'postedTime' as keys, it's not a Tweet" )) return original_format
def embedded_tweet(self): """ Get the retweeted Tweet OR the quoted Tweet and return it as a Tweet object Returns: Tweet (or None, if the Tweet is neither a quote tweet or a Retweet): a Tweet representing the quote Tweet or the Retweet (see tweet_embeds.get_embedded_tweet, this is that value as a Tweet) Raises: NotATweetError: if embedded tweet is malformed """ embedded_tweet = tweet_embeds.get_embedded_tweet(self) if embedded_tweet is not None: try: return Tweet(embedded_tweet) except NotATweetError as nate: raise(NotATweetError("The embedded tweet payload {} appears malformed." + " Failed with '{}'".format(embedded_tweet, nate))) else: return None
def check_tweet(tweet, validation_checking=False): """ Ensures a tweet is valid and determines the type of format for the tweet. Args: tweet (dict/Tweet): the tweet payload validation_checking (bool): check for valid key structure in a tweet. """ if "id" not in tweet: raise NotATweetError("This text has no 'id' key") original_format = is_original_format(tweet) if original_format: _check_original_format_tweet(tweet, validation_checking=validation_checking) else: _check_activity_streams_tweet(tweet, validation_checking=validation_checking) return original_format
def retweeted_tweet(self): """ The retweeted Tweet as a Tweet object If the Tweet is not a Retweet, return None If the Retweet payload cannot be loaded as a Tweet, this will raise a `NotATweetError` Returns: Tweet: A Tweet representing the retweeted status (or None) (see tweet_embeds.get_retweet, this is that value as a Tweet) Raises: NotATweetError: if retweeted tweet is malformed """ retweet = tweet_embeds.get_retweeted_tweet(self) if retweet is not None: try: return Tweet(retweet) except NotATweetError as nate: raise(NotATweetError("The retweet payload appears malformed." + " Failed with '{}'".format(nate))) else: return None