def get_tweets(self): api = Api.twitter_api() tweets_ = [] created = [] hashtag = [] for search_word in self.search_words: new_search = search_word + " -filter:retweets" # Filter retweets tweets = tw.Cursor(api.search, q=new_search, lang='en').items(self.n_tweets) for tweet in tweets: tweets_.append(tweet.text) # Get tweets created.append(tweet.created_at) # Get timestamp hashtag.append(search_word) dataset = pd.DataFrame({ "hashtag": hashtag, "created_at": created, "tweet": tweets_ }) store_data = DataHandler('twitter', self.search_words) store_data.store_network_dataset(dataset)
def get_tweets(self): api = Api.twitter_api() tweets = api.user_timeline(screen_name=self.tt_user, count=200, tweet_mode='extended', include_rts=False, exclude_replies=True) last_id = tweets[-1].id while (True): more_tweets = api.user_timeline(screen_name=self.tt_user, count=200, include_rts=False, exclude_replies=True, max_id=last_id - 1) if (len(more_tweets) == 0): break else: last_id = more_tweets[-1].id - 1 tweets = tweets + more_tweets created = [] tweet_id = [] text = [] hashtags = [] symbols = [] image_url = [] user_mentions = [] user_id = [] user_name = [] user_screen_name = [] user_location = [] user_description = [] user_protected = [] user_followers_count = [] user_friends_count = [] user_listed_count = [] user_created_at = [] user_favourites_count = [] user_utc_offset = [] user_timezone = [] user_geo_enabled = [] user_verified = [] user_statuses_count = [] user_lang = [] user_contributors_enabled = [] user_is_translator = [] user_is_translation_enabled = [] quoted_status = [] quoted_text = [] quoted_media = [] quoted_user_id = [] for tweet in tweets: created.append(tweet.created_at) tweet_id.append(tweet.id) try: text.append(tweet.full_text) except AttributeError: text.append(tweet.text) hashtags.append(tweet.entities['hashtags']) symbols.append(tweet.entities['symbols']) user_mentions.append(tweet.entities['user_mentions']) user_id.append(tweet.user.id) user_name.append(tweet.user.name) user_screen_name.append(tweet.user.screen_name) user_location.append(tweet.user.location) user_description.append(tweet.user.description) user_protected.append(tweet.user.protected) user_followers_count.append(tweet.user.followers_count) user_friends_count.append(tweet.user.friends_count) user_listed_count.append(tweet.user.listed_count) user_created_at.append(tweet.user.created_at.strftime("%Y-%m-%d")) user_favourites_count.append(tweet.user.favourites_count) user_utc_offset.append(tweet.user.utc_offset) user_timezone.append(tweet.user.time_zone) user_geo_enabled.append(tweet.user.geo_enabled) user_verified.append(tweet.user.verified) user_statuses_count.append(tweet.user.statuses_count) user_lang.append(tweet.user.lang) user_contributors_enabled.append(tweet.user.contributors_enabled) user_is_translator.append(tweet.user.is_translator) user_is_translation_enabled.append( tweet.user.is_translation_enabled) if tweet.is_quote_status == True: try: quoted_text.append(tweet.quoted_status.text) except AttributeError: quoted_text.append(np.nan) try: quoted_user_id.append(tweet.quoted_status.user.id) except AttributeError: quoted_user_id.append(np.nan) try: quoted_media.append( tweet.quoted_status.entities['media'][0]['media_url']) except Exception: quoted_media.append(np.nan) else: quoted_text.append(np.nan) quoted_user_id.append(np.nan) quoted_media.append(np.nan) try: image_url.append(tweet.entities['media'][0]['media_url']) except: image_url.append(np.nan) dataset = pd.DataFrame({ "created_at": created, "tweet_id": tweet_id, "text": text, "hashtags": hashtags, "symbols": symbols, "image_url": image_url, "user_mentions": user_mentions, "user_id": user_id, "user_name": user_name, "user_screen_name": user_screen_name, "user_location": user_location, "user_description": user_description, "user_protected": user_protected, "user_followers_count": user_followers_count, "user_friends_count": user_friends_count, "user_listed_count": user_listed_count, "user_created_at": user_created_at, "user_favourites_count": user_favourites_count, "user_utc_offset": user_utc_offset, "user_timezone": user_timezone, "user_geo_enabled": user_geo_enabled, "user_verified": user_verified, "user_statuses_count": user_statuses_count, "user_lang": user_lang, "user_contributors_enabled": user_contributors_enabled, "user_is_translator": user_is_translator, "user_is_translation_enabled": user_is_translation_enabled, "quoted_text": quoted_text, "quoted_media": quoted_media, "quoted_user_id": quoted_user_id, }) store_data = DataHandler('twitter', self.tt_user) store_data.store_network_dataset(dataset) import pdb pdb.set_trace() print('a')