class DataTrimmer: def __init__(self): self.tweet_processor = TweetProcessor() self.tone_analyzer = ToneAnalyzer() @staticmethod def get_file_location(index): return os.getcwd() + '/Gnip_Client/Gnip_Searches/Gnip_Search_' + str(index) + '.json' def load_json_blob(self, counter): file_path = self.get_file_location(counter) with open(file_path) as data_file: return json.load(data_file) def get_tweets(self, s, r): tweet_set = set([]) for i in range(s, r): try: print('At index ' + str(i)) json_file = self.load_json_blob(i) for result in json_file['results']: tweet = self.tweet_processor.standardize_tweet(result['body']) emotions = self.tone_analyzer.query_ibm_for_tone(tweet) tweet_set.add(tweet) with open('output.csv', 'a+b') as analyzed_tweets: if emotions[0] and emotions[1] and emotions[2] and emotions[3] and emotions[4]: analyzed_tweets.write(tweet + ', ' + emotions[0] + ', ' + emotions[1] + ', ' + emotions[2] + ', ' + emotions[3] + ', ' + emotions[4] + '\n') return tweet_set except: print('Key "body" not found.') return None
class DataTrimmer: def __init__(self): self.tweet_processor = TweetProcessor() self.tone_analyzer = ToneAnalyzer() @staticmethod def get_file_location(index): return os.getcwd() + '/Gnip_Client/Gnip_Searches/Gnip_Search_' + str(index) + '.json' def load_json_blob(self, counter): file_path = self.get_file_location(counter) with open(file_path) as data_file: return json.load(data_file) # 167553 total def get_tweets(self, s, r): try: ordered_tweet_dict = pickle.load(open("saved_dict.p", "rb")) return ordered_tweet_dict except IOError: ordered_tweet_dict = OrderedDict() for i in range(s, r): try: print('At index ' + str(i)) json_file = self.load_json_blob(i) for result in json_file['results']: tweet = self.tweet_processor.standardize_tweet(result['body']) if tweet in ordered_tweet_dict: ordered_tweet_dict[tweet] += 1 else: ordered_tweet_dict[tweet] = 1 except: print('Key "body" not found.') return None print("LENGTH: " + str(len(ordered_tweet_dict))) pickle.dump(ordered_tweet_dict, open('saved_dict.p', 'wb')) return ordered_tweet_dict def write_emotions(self, tweet_dict): for i, entry in enumerate(tweet_dict): try: if i > 57556: print i emotions = self.tone_analyzer.query_ibm_for_tone(entry) with open('output.csv', 'a+b') as analyzed_tweets: if emotions[0] and emotions[1] and emotions[2] and emotions[3] and emotions[4]: analyzed_tweets.write(entry + ', ' + emotions[0] + ', ' + emotions[1] + ', ' + emotions[2] + ', ' + emotions[3] + ', ' + emotions[4] + '\n') except: print('Error')
def __init__(self): self.tweet_processor = TweetProcessor() self.tone_analyzer = ToneAnalyzer()
def test_remove_rt(self): tweet_processor = TweetProcessor() tweet = 'rt test' self.assertEqual('test', tweet_processor.remove_rt(tweet)) tweet = 'no test' self.assertEqual('no test', tweet_processor.remove_rt(tweet))
def test_replace_hashtag_with_word(self): tweet_processor = TweetProcessor() self.assertEqual('testing go buffs go heat nba', tweet_processor.replace_hashtag_with_word('testing #go #buffs go #heat #nba'))
def test_replace_at_with_word(self): tweet_processor = TweetProcessor() self.assertEqual('USER tell USER about USER go USER', tweet_processor.replace_at_with_word('@fred tell @me about @twitter go @buffs'))
def test_remove_url(self): tweet_processor = TweetProcessor() self.assertEqual('testing URL URL', tweet_processor.remove_url('testing www.google.com https://www.testing.com'))
def test_remove_stop_words(self): tweet_processor = TweetProcessor() self.assertEqual('told tweet', tweet_processor.remove_stop_words('i told me my now should during tweet'))
def test_remove_repeated_chars(self): tweet_processor = TweetProcessor() self.assertEqual('tessttingg', tweet_processor.remove_repeated_chars('tessssssssssssssssttttttttingggg'))
def test_remove_non_letter_and_space(self): tweet_processor = TweetProcessor() self.assertEqual('test ing', tweet_processor.remove_non_letter_and_space('test!@#$%^&*()12334567890 ing'))
def test_remove_extra_whitespaces(self): tweet_processor = TweetProcessor() self.assertEqual('this is what it should look like', tweet_processor.remove_extra_whitespaces('this is what it should look like'))
def test_remove_appended_url_or_user(self): tweet_processor = TweetProcessor() self.assertEqual('test ting', tweet_processor.remove_appended_url_or_user('testURL tingUSER'))
def test_standardize_tweet(self): tweet_processor = TweetProcessor() self.assertEqual('justt wanted tell people', tweet_processor.standardize_tweet('RT So I justttt Wanted to @FReD' ' www.google.com tellll people that www.google.com'))
def test_check_words_in_tweet_can_return_false(self): tweet_processor = TweetProcessor() self.assertEqual(False, tweet_processor.check_words_in_tweet('no words should match our filter'))
def test_check_words_in_tweet(self): tweet_processor = TweetProcessor() self.assertEqual(True, tweet_processor.check_words_in_tweet('i love the nba'))