def __init__(self, tweet): self.created_at = tweet["created_at"] self.id_str = tweet["id_str"] self.entities = Entities(tweet["entities"]) self.external_sources = {"instagram": False, "foursquare": False, "swarm": False, "path": False} for url_data in self.entities.urls: match_external_sources = re.findall('instagram|swarmapp|foursquare|path', url_data['expanded_url'], re.I) if match_external_sources != []: self.external_sources[match_external_sources[0]] = True self.in_reply_to_user_id_str = tweet["in_reply_to_user_id_str"] self.contributors = tweet["contributors"] self.text = tweet["text"] self.retweet_count = tweet["retweet_count"] self.in_reply_to_status_id_str = tweet["in_reply_to_status_id_str"] self.id = tweet["id"] self.in_reply_to_user_id = tweet["in_reply_to_user_id"] # Place # ISSUES: # 1 - Sometimes location checkin coordinates is not where the location really is. # 2 - Sometimes a specific name, a city name or country. # 3 - Match location names with coordinates? # 4 - Check if location is within bbox of each city? (Belem in first case) if tweet["place"] is not None: self.place = TwitterPlace(tweet["place"]) coordinates = self.place.bounding_box["coordinates"] # If coordinates for latitutde and longitude are the same, get those as the coordinates of the location. if len(set([coordinates[0][0][1], coordinates[0][1][1], coordinates[0][2][1], coordinates[0][3][1]])) == 1 and len(set([coordinates[0][0][0], coordinates[0][1][0], coordinates[0][2][0], coordinates[0][3][0]])) == 1: self.location_coordinates = (coordinates[0][0][1], coordinates[0][0][0]) # Else, take the mean of those as coordinates of a location (centroid). else: mean_lat = (coordinates[0][0][1] + coordinates[0][2][1]) / 2 mean_lon = (coordinates[0][0][0] + coordinates[0][1][0]) / 2 self.location_coordinates = (mean_lat, mean_lon) # User if tweet["user"] != None: self.user = TwitterUser(tweet["user"]) self.in_reply_to_screen_name = tweet["in_reply_to_screen_name"] # Treat source field self.source = tweet["source"] html = BeautifulSoup(self.source, "lxml") tag = html.findAll("a")[0] self.source = tag.contents[0]
def reply_to_user(self): ''' Returns TwitterUser object if Status is a direct response to a user, otherwise returns None. ''' if self.data['in_reply_to_user_id']: for user_hash in self.mentions: if user_hash['id'] == self.data['in_reply_to_user_id']: user = TwitterUser(user_hash) return user
def deserialize_user(raw_tweet): """ :param raw_tweet: :return TwitterUser: """ user = TwitterUser() full_tweet = json.loads(raw_tweet) try: user.user_id = full_tweet['user']['id'] user.user_name = full_tweet['user']['name'] user.screen_name = full_tweet['user']['screen_name'] user.location = full_tweet['user']['location'] user.followers = full_tweet['user']['followers_count'] except KeyError as ke: log.warning('Could not map user: '.format(ke)) return user
def recipient(self): '''Returns a TwitterUser object for the DM's receipient''' recipient_hash = self.data['recipient'] recipient = TwitterUser(recipient_hash) return recipient
def sender(self): '''Returns a TwitterUser object for the DM's sender''' sender_hash = self.data['sender'] sender = TwitterUser(sender_hash) return sender
def recipient(self): '''Returns a TwitterUser object for the user being followed''' recipient_hash = self.data['target'] recipient = TwitterUser(recipient_hash) return recipient
def sender(self): '''Returns a TwitterUser object for the follower''' sender_hash = self.data['source'] sender = TwitterUser(sender_hash) return sender
def sender(self): sender_hash = self.data['user'] sender = TwitterUser(sender_hash) return sender