def setup(self): self.player = Player() self.capitol = Capitol() self.coequipier = None self.gui = Gui(0, CONST.MAX_VOTES) self.supporters = [] self.bullets = [] self.ui_manager = UIManager() self.tweet = Tweet() self.strike_button = Strike(center_x=CONST.STRIKE_BUTTON_X, center_y=CONST.STRIKE_BUTTON_Y, normal_texture=self.button_normal, hover_texture=self.button_hovered_texture, text='') self.ui_manager.add_ui_element(self.strike_button) arcade.set_background_color(arcade.color.AMAZON) self.background = arcade.load_texture("tileset/background.png") self.music_list = ["audios/background_music.mp3"] self.current_song_index = 0 self.music = arcade.Sound(self.music_list[self.current_song_index], streaming=True) self.play_song()
def query_tweets(self, query, sequence=0): logger.info('Searching: %s' % query) since_id = 0 try: if sequence == 0: search_results = self.api.search.tweets(q=query, count=100, result_type='recent') else: search_results = self.api.search.tweets(q=query, count=100, since_id=since_id) except: return statuses = search_results['statuses'] for _tweet in statuses: if _tweet.get('timeout', False): raise TweetCollectorTimeoutException('Timeout') if not self.is_tweet_valid(_tweet): continue if since_id == 0 or since_id < _tweet['id']: since_id = _tweet['id'] tweet = Tweet(str(_tweet['id']), int(time.mktime(time.strptime(_tweet['created_at'], ts_format))), _tweet['text'], _tweet['user']['screen_name'], sa.get_sentiment(self.clean(_tweet['text']))) tweet.save()
def tweetAnalyser(search_topic): tweet_handler = Tweet(search_topic) public_tweets = tweet_handler.getTweets() tweet_dict = {} i = 0 for t in public_tweets: temp_tweet = Unwanted72(t.text) tweet_blob = TextBlob(temp_tweet) try: tweet_blob = tweet_blob.translate(from_lang=t.lang, to='en') except Exception: pass tweet_sentiment = tweet_blob.sentiment temp_dict = {} temp_dict['user'] = t.user.screen_name temp_dict['text'] = t.text temp_dict['polarity'] = tweet_sentiment.polarity temp_dict['subjectivity'] = tweet_sentiment.subjectivity tweet_dict["tweet" + str(i)] = temp_dict i += 1 return json.dumps(tweet_dict)
def automaticCharts(config, dir, args): if (args.lazy): print( "Will skip generating or tweeting charts but will update the chart tracker" ) dataAccess = MySqlDataAccess(config) ChartTracker.updateTrackerToLastAvailable(dataAccess, config, dir, 'Electricity') ChartTracker.updateTrackerToLastAvailable(dataAccess, config, dir, 'Gas') elif (args.folders): print('Generating the folders for the charts') # TODO: Add generate the folders if this flag is set makeImagesFoldersIfMissing(dir) else: print("Will just generate the charts") # Maybe only run this if a specific flag is passed in so that it isn't # constantly checking every time the script is run dataAccess = MySqlDataAccess(config) listDicChartsElec = ChartTracker.generateIfAvailable( dataAccess, config, dir, 'Electricity') listDicChartsGas = ChartTracker.generateIfAvailable( dataAccess, config, dir, 'Gas') listDicCharts = listDicChartsElec + listDicChartsGas if (args.tweet): print("Tweeting the charts after generating them") chirp = Tweet(config) chirp.postBatch(listDicCharts)
def get_favorites(self, twitter_id): favorites = [] try: for i in range(1, 7): check1 = True while check1: try: favs = self.twitter_connection.twitter_api.favorites(id = twitter_id, page=i) check1 = False for fav in favs: tweet = Tweet() tweet.source_id = twitter_id tweet.text = fav.text tweet.target_id = fav.author.id favorites.append(tweet) if len(favorites) == 100: break check1 = False except: self.twitter_connection.change_connection_keys() except: return favorites return favorites
def __init__(self, wait_time): self.WAIT_TIME = wait_time self.FUNCTIONS = Functions(self.SECTION + '.db') self.TWEET = Tweet() self.full_path = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) with open(self.full_path + '/settings.json') as data_file: self.RSS_URLS = json.load(data_file)[self.SECTION]["urls"]
def get_trends(self, text_query, count): tt_buff = list() rand_id = round(random()*1000) for t in tweepy.Cursor(self.api.search, q = text_query + ' -filter:retweets' , lang='en').items(count): tweet = Tweet(rand_id, text_query, str(t.id), t._json["lang"], t._json["text"]) tt_buff.append(tweet) Tweet.save_all_to_db(tt_buff) return [self.get_tweet_html(int(t.tweet_id)) for t in tt_buff[:10]], rand_id
def on_status(self, status): if self.canTweet: tweet = Tweet(status) if tweet.isAcceptable(): sent = Sentence(tweet.oneLine) #print(sent) if isPoem(sent): formatted = formatPoem(sent) if formatted != tweet.cleaned: #checks for direct duplicate of tweet text (with \n's) self.api.update_status("{}\nA #lifepoem by @{}".format(formatted, tweet.user), tweet.id) self.api.create_favorite(tweet.id)
class HackerNews: CONFIG = ConfigParser.ConfigParser() SECTION = 'hacker_news' def __init__(self, wait_time): self.WAIT_TIME = wait_time self.full_path = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) with open(self.full_path + '/settings.json') as data_file: self.settings = json.load(data_file)[self.SECTION] self.APIURL = self.settings["apiurl"] self.TOPSTORIESURL = self.settings["topstoriesurl"] self.COMMENT = self.settings["comment"] self.FUNCTIONS = Functions(self.SECTION + '.db') self.TWEET = Tweet() def tweet_stories(self, story_count): data = {} topstories = self.FUNCTIONS.get_data(self.TOPSTORIESURL) h = HTMLParser.HTMLParser() unseen = [(rank, storyid) for (rank, storyid) in list(enumerate(topstories[0:story_count])) if not self.FUNCTIONS.seen(storyid)] for (rank, storyid) in unseen: if rank not in data: jsonurl = "{0}item/{1}.json".format(self.APIURL, storyid) data[rank] = self.FUNCTIONS.get_data(jsonurl) title = data[rank]['title'].encode('utf-8') title.replace("Show HN: ", "") if 'url' in data[rank]: url = h.unescape(data[rank]['url']).encode('utf-8') else: url = '' comment_url = "{0}{1}".format(self.COMMENT, storyid) if (url != ''): self.TWEET.tweet(title, url, False) else: self.TWEET.tweet(title, comment_url, False) self.FUNCTIONS.write_to_seen(storyid) time.sleep(randint(0, self.WAIT_TIME)) else: print "No tweets to send " + str( datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y"))
def get_tweet_with_time(hashtagSearched): myTweet = Tweet(hashtagSearched) tweets = myTweet.getTweetWithTime() result = tweets.to_dict(orient='index') result = {str(k): v for k, v in result.items()} listTweet = [] for tweet in result.keys(): listTweet.append(result[tweet]) return jsonify({'tweets': listTweet})
def get_negAndpos(tweets): lem_pos = Tweet.get_all_messages_sentiment(tweets, Tweet.POSITIVE) lem_neg = Tweet.get_all_messages_sentiment(tweets, Tweet.NEGATIVE) lem_neu = Tweet.get_all_messages_sentiment(tweets, Tweet.NEUTRAL) lem_vneg = Tweet.get_all_messages_sentiment(tweets, Tweet.VERYNEGATIVE) lem_vpos = Tweet.get_all_messages_sentiment(tweets, Tweet.VERYPOSITIVE) all_pos = ' '.join(lem_pos) all_neg = ' '.join(lem_neg) all_neutral = ' '.join(lem_neu) all_vneg = ' '.join(lem_vneg) all_vpos = ' '.join(lem_vpos) return [all_pos, all_neg, all_neutral, all_vpos, all_vneg]
class RSS: CONFIG = ConfigParser.RawConfigParser() SECTION = 'rss' def __init__(self, wait_time): self.WAIT_TIME = wait_time self.FUNCTIONS = Functions(self.SECTION + '.db') self.TWEET = Tweet() self.full_path = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) with open(self.full_path + '/settings.json') as data_file: self.RSS_URLS = json.load(data_file)[self.SECTION]["urls"] def tweet_stories(self, story_count): for url in self.RSS_URLS: # For each URL defined in the config, get the JSON data print "Getting data from " + str(url) + "\n" data = feedparser.parse(url) if (data == False): # If it returns no data, continue onto the next URL continue print "Got data \n" # Posts are "children" of the main request children = data['entries'] counter = 0 for post in children: postid = data['feed']['title'] + "_" + str(post['id']) if (counter >= story_count): # Only use a certain number of posts break counter += 1 if (self.FUNCTIONS.seen(postid)): # If a post has been posted already, move to the next one and reset the counter counter -= 1 continue url = post['link'] title = post['title'].encode('utf-8') self.TWEET.tweet(title, url, False) self.FUNCTIONS.write_to_seen(postid) time.sleep(randint(0, self.WAIT_TIME))
def get_tweet(): # je crée seulement un tweet comme exemple myTweet = Tweet() # j'ai analysé son sentiment myTweet.sentiment_analyze_tweet() # Je l'ai inclus dans une liste tweets = [] tweets.append(myTweet.serialize()) #je donne la réponse au serveur return jsonify({'tweets': tweets})
def get_analysis_data(cls, text_query, rand_id): negative = 0 positive = 0 neutral = 0 sentiment_list = Tweet.query.filter_by(tag=text_query, rand_id=rand_id).with_entities(Tweet.sentiment).all() for i in sentiment_list: if i[0] <= -0.31: negative += 1 elif -0.31 < i[0] < 0.31: neutral += 1 else: positive += 1 Tweet.del_all_by_key(text_query, rand_id) return [negative, positive, neutral]
def __init__(self, wait_time): self.WAIT_TIME = wait_time self.full_path = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) with open(self.full_path + '/settings.json') as data_file: self.settings = json.load(data_file)[self.SECTION] self.APIURL = self.settings["apiurl"] self.TOPSTORIESURL = self.settings["topstoriesurl"] self.COMMENT = self.settings["comment"] self.FUNCTIONS = Functions(self.SECTION + '.db') self.TWEET = Tweet()
def retrieving_tweets_polarity(symbol): # nltk.download('punkt') auth = tweepy.OAuthHandler(ct.consumer_key, ct.consumer_secret) auth.set_access_token(ct.access_token, ct.access_token_secret) user = tweepy.API(auth, wait_on_rate_limit=True) tweets = tweepy.Cursor(user.search, q=str(symbol), tweet_mode='extended', lang='pt').items(ct.num_of_tweets) tweet_list = [] global_polarity = 0 for tweet in tweets: tw = tweet.full_text blob = TextBlob(tw) polarity = 0 for sentence in blob.sentences: polarity += sentence.sentiment.polarity global_polarity += sentence.sentiment.polarity tweet_list.append(Tweet(tw, polarity)) if len(tweet_list) > 0: global_polarity = global_polarity / len(tweet_list) return global_polarity
def ordenar(self): twitter_data = open('twitter-data.bin', 'rb') twitter_data.seek(0, 2) tamanho = twitter_data.tell() twitter_data.seek(0, 0) position = 0 while position < tamanho: line = str(twitter_data.read(502).decode('utf-8')) line = line.split(';') DADOS.append( Tweet(id=int(line[1]), len=line[0], user=line[2], userLocation=line[3], tweetText=line[4], hashtags=line[7], date=line[5], likes=line[6])) position += 502 twitter_data.seek(position, 0) twitter_data.close() tweets_ordenados = sorted(DADOS, key=Tweet.get_id) self.store_ordered_file(tweets_ordenados)
def on_data(self, data): """ Append tweets as json into the "tweets.json" file. """ # Check tweet limit enough_tweet = self.tweet_limit <= self.tweet_count if not enough_tweet: tweet = Tweet(data=data, rm_stop_words=self.rm_stop_words, inflect_nb_to_words=self.inflect_nb_to_words, tracked_patterns=self.tracked_patterns, tweet_min_len=self.tweet_min_len, tweet_format=self.tweet_format, only_retweeted=self.only_retweeted, match_reply=self.match_reply) # Filter tweet the right format based on: # tracked patterns and min length if tweet.complies: if self.match_reply: origin_tweet = self.find_origin_tweet(tweet) if origin_tweet: self.tweetDB.add_tweet(tweet) self.tweetDB.add_tweet(origin_tweet) self.__tweet_listened() else: self.tweetDB.add_tweet(tweet) self.__tweet_listened() # Quit app when the tweet count is reached else: self.__end_stream() return False
def tweet(sender): """ Method to simulate a particular agent tweeting :param sender: Agent :return: None """ return Tweet(sender)
def get_tweets_with_time(username, collection_rounds=5): import tweepy from user_crawler import secrets from Tweet import Tweet ## API KEYS consumer_key = secrets.consumer_key consumer_secret = secrets.consumer_secret access_key = secrets.access_key access_secret = secrets.access_secret ## Set up Auth auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) results = api.user_timeline(username, count=200, include_rts=1) counter = 0 tweets = [] # repeat calls until max reached while counter < collection_rounds: for i in range(0, len(results)): tweet = Tweet(status=results[i].text, timeinfo=results[i].created_at) tweets.append(tweet) results = api.user_timeline(username, max_id=results.max_id, count=200, include_rts=1) counter += 1 return tweets
def setOfReplies(rootTweetID, outputJSON, tweetList): # Load in the JSON data jsonData = json.load(open(outputJSON, 'r')) statuses = jsonData["statuses"] maxID = jsonData["search_metadata"]["max_id"] # Need smallest id smallID = maxID # Should run through the potential set of replies, and pull out the cvalid ones for status in statuses: # Check if it replies to the wanted tweet replyID = status["in_reply_to_status_id"] if (replyID == rootTweetID): # Aight the tweet is a reply to the root tweet tempTweet = Tweet() tweetList += [tempTweet] # now grab and parse all the given information (most likely standard) JSONReader.parseStandardInfo(tweetList[-1], status) if (status["id"] < smallID): smallID = status["id"] # max id can be easily grabbed, so return it return [smallID, len(statuses)]
def get_tweets(self, is_retweet, twitter_id): tweets = [] numberOfPage = None checkstuff = 0 if is_retweet: # ordinary tweet numberOfPage = 50 else: numberOfPage = 20 for i in range(1, numberOfPage): if checkstuff: break check1 = True stuff = None while check1: try: stuff = self.twitter_connection.twitter_api.user_timeline(id=twitter_id, page=i) if len(tweets) > 100: break if len(stuff) == 0: checkstuff = True break for tw in stuff: tweet = Tweet() if is_retweet: if tw.text[0] is "R" and tw.text[1] is "T": author = tw.text[4:len(tw.text)].split(':')[0] try: author_id = self.twitter_connection.twitter_api.get_user(screen_name=author).id tweet.source_id = twitter_id tweet.target_id = author_id tweet.text = tw.text tweets.append(tweet) except: pass else: if tw.text[0] is not "R" and tw.text[1] is not "T": tweet.text = tw.text tweet.source_id = twitter_id tweet.target_id = 0 tweet.created_at = tw.created_at tweets.append(tweet) check1 = False except: self.twitter_connection.change_connection_keys() return tweets
def main(): #tweet list tweet = [] #file fileName = "tweets.dat" infile = open(fileName, 'w') while( True ): print('Tweet Menu\n') print('----------\n') print('1. Make a Tweet\n') print('2. View Recent Tweets\n') print('3. Search Tweets\n') print('4. Quit\n') #getting user input try: choice = int(input('What would you like to do? ')) if( choice < 0 or choice > 4 ): print('Please select a valid option') continue except ValueError: print('Please enter a valid option') else: if( choice == 1 ): author = input('What is your name? ') text = input('What would you like to tweet? ') while( len(text) > 140): print('Your tweet has exceeded the 140 character limit') continue tweet_object = Tweet(author, text) tweet.append(tweet_object) print(author, 'your tweet was save.') elif( choice == 2 ): print('Recent Tweets\n') print('-------------\n') if( len(tweet) == 0 ): print('There are no recent tweets') else: minimum = -1 if( len(tweet) > 5 ): minimum = len(tweet) - 6 for x in range(len(tweet) - 1, minimum, -1): print(tweet[x]) elif( choice == 3): search = input('What would you like to search for? ') key = 0 for x in range(len(tweet) - 1, -1, -1): if search in tweet[x].get_text(): key = 1 print(tweet[x]) if( key == 0): print('No tweets contained ', search) elif( choice == 4 ): if len(tweet) > 0: outfile = open('tweets.dat', 'a') outfile.close() print('Thanks for using the Tweet Manager!') break
def post(self, username, text): tweet = Tweet(text) user = User.query.filter_by(username=username).first() if user is None: return jsonify([False]) user.tweets.append(tweet) db.session.add(user) db.session.commit() return jsonify([True])
def get_interactions_for_user(self, user_id): user_obj = User(user_id) query_string = {self.query: int(user_id)} tweets = self.read_collection.find(query_string) print(tweets.count()) for t in tweets: tweet = Tweet(t) user_obj.add_connections(tweet) return user_obj
def loadTweets(self): """Returns Tweets as ... we have to figure out what is best..""" #Returns nested list of the tweets with the features as the index #Index is calculated as hexadecimal value of the 4 features. #The feature order is [length of Author name, length of tweet, # number of hashtags, number of mentions] where a 1 or 0 is set # based on a comparison to some predefined thresholds (trsh1-4) #returns features to be used in later methods. trsh1 = 1 trsh2 = 3 trsh3 = 128 trsh4 = 16 #features = [] #self.tweets #for f in range(16): # features.append([]) csvFile = open('xboxData.csv') reader = csv.reader(csvFile) x = 0 for row in reader: feat1 = 0 feat2 = 0 feat3 = 0 feat4 = 0 if (int(row[3])>=trsh1): feat1 = 1 if (int(row[4])>=trsh2): feat2 = 1 if (int(row[5])>=trsh3): feat3 = 1 if (int(row[6])>=trsh4): feat4 = 1 index = feat1 + 2*feat2 + 4*feat3 + 8*feat4 tw =Tweet(row[1],row[0][2:],row[2]) tw._featureVector = index self._tweets[index].append(tw) x+=1 if (x>=2500): return return
def make_Tweet(): name = str(input("\nWhat is your name?")) text = str(input("What would you like to tweet?")) while(len(text) >140): print("Tweets can only be 140 characters!") text = input("What would you like to tweet?") newTweet = Tweet(name, text, 0) timeline.append(newTweet)
def parse_corpus(filename): path = CorpusParser.set_path(filename) tweets = dict() with open(path, "r") as file: for line in file: if line == '\n': break tweet = Tweet(line) tweets[tweet.id] = tweet return tweets
def create_Tweet_object(self, search_result): media_urls = None geocode = None if MEDIA_URLS_ENABLED: media_urls = self.lookup_media_urls(search_result) if GEOCODES_ENABLED: geocode = self.lookup_geocode(search_result, geocoder) tweet = Tweet(search_result, media_urls, geocode) return tweet
def create_Tweet_objects(self, search_results=None): if search_results is None: raise TypeError('Search results argument required') elif not isinstance(search_results, list): raise TypeError('Search results argument must be a list of Tweets') tweets = [] for search_result in search_results: tweet = Tweet(search_result, self.scorer) tweets.append(tweet) return tweets
def extract_tweets(self, response_json): for obj in response_json["data"]: self.tweets.append( Tweet( obj["id"], obj["text"], self.author, obj["created_at"], json.dumps(obj["public_metrics"]), obj["public_metrics"]["like_count"], ))
def startElement(self, name, attrs): self.pcharacters = '' if name == 'status' or name == 'retweeted_status': self.stack.append(Tweet()) elif name == 'user': self.stack.append(User()) elif name == 'hashtag': self.stack.append(self.Hashtag()) elif name == 'user_mention': self.stack.append(self.Mention()) elif name == 'place': self.ignore_object = 'place'
def searchSentimentByTweet(dbm, tweet): sentimentRes = Sentiment(dbm) if dbm is not None: res = dbm.runQuery("SELECT tweetID, sentiment FROM Sentiment WHERE tweetID = {0}".format(tweet.tweetID)) try: if res is not None: row = res[0] sentimentRes.set(Tweet.searchTweetById(dbm, row[0]), row[1]) except: pass return sentimentRes else: raise Exception("No dbm declared")
def getAllSentiments(dbm): allSentiments = [] if dbm is not None: res = dbm.runQuery("SELECT tweetID, sentiment FROM Sentiment") for row in res: sentimentRes = Sentiment() tweet = Tweet.searchTweetById(dbm, row[0]) sentimentRes.set(tweet, row[1]) sentimentRes.dbm = dbm allSentiments.append(sentimentRes) return allSentiments else: raise Exception("No dbm declared")
def fetch_new_tweets(): num_of_topics = 1 auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter_api = twitter.Twitter(auth=auth) US_WOE_ID = 23424977 WORLD_WOE_ID = 1 tweet_list = [] tweet_text_list = [] us_trends = twitter_api.trends.place(_id=WORLD_WOE_ID) # print the top 10 tweets topics = [] for i in range(num_of_topics): name = us_trends[0]["trends"][i]["name"] print name topics.append(name) total_count = 500 count_per_search = 100 topic_counter = 0 for topic in topics: topic_counter += 1 print "topic #", topic_counter count_fetched = 0 max_id = -1 while count_fetched < total_count: row_num = 0 search_results = twitter_api.search.tweets(q=topic, count=count_per_search, max_id=max_id) statuses = search_results["statuses"] search_results_len = len(statuses) # per tweet processing while row_num < count_per_search: t = Tweet() status = statuses[row_num] #print status resp = json.dumps(status, indent=4) #print resp text = status["text"] t.text = text # Message based features t.length_tweet = len(text) t.num_words = len(text.split()) t.num_unique_chars = CommonUtil.count_unique_chars(text) t.num_hashtags = text.count("#") t.retweet_cnt = status["retweet_count"] max_id = status["id"] t.num_swear_words = CommonUtil.count_swear_words(text) t.num_at_emotions = text.count("@") # Source based Features user_features = status["user"] t.registration_age = CommonUtil.count_num_days_from_today(user_features["created_at"]) t.num_followers = user_features["followers_count"] t.num_followee = user_features["friends_count"] if t.num_followee !=0: t.ratio_foll_followee = t.num_followers / t.num_followee is_verified = user_features["verified"] if is_verified: t.is_verified = 1 else: t.is_verified = 0 t.len_desc = len(user_features["description"]) t.len_screen_name = len(user_features["screen_name"]) user_url = user_features["url"] if user_url: t.has_url = 1 # Create tweet characteristics to write to file tweet_str = str(t.length_tweet) + "|" + str(t.num_words) + "|" + str(t.num_unique_chars) + "|" \ + str(t.num_hashtags) + "|" + str(t.retweet_cnt) + "|" + str(t.num_swear_words) + "|" \ + str(t.num_at_emotions) + "|" \ + str(t.registration_age) + "|" + str(t.num_followers) + "|" + str(t.num_followee) + "|" \ + str(t.is_verified) + "|" + str(t.len_desc) + "|" + str(t.len_screen_name) + "|" \ + str(t.has_url) tweet_list.append(tweet_str) tweet_text_list.append(smart_str(text)) row_num += 1 count_fetched += search_results_len return tweet_list , tweet_text_list
while True: try: # Get all lines in the file line = jsonFile.readline() # The lines will contain a non-empty string until the eof # so we can break the loop when this happens if line == "": break # Translate the JSON string into python JSON representation jsonObject = decoder.decode(line) # Make a new tweet and add it to the set of tweets that we have # TODO # For some reason this doesn't work when I add the date. Must fix. #tweet = Tweet(jsonObject["text"], jsonObject["created_at"]) tweet = Tweet(jsonObject["text"]) # print("Tweet content is " + tweet.content + ", and it was created on " + "NOT TODAY") tweetSet.append(tweet) # Some of the lines have encoding errors so ignore them except UnicodeDecodeError: pass except: pass print("--- Finished loading Tweets ---") print("--- Printing size of Tweets ---") # Print the number of tweets print(" Size: " + str(len(tweetSet))) print("--- Loading most common words in the Tweets ---") # Make a list of all the unique words in the tweets which will be the columns of the matrix.
while True: try: # Get all lines in the file line = jsonFile.readline() # The lines will contain a non-empty string until the eof # so we can break the loop when this happens if line == "": break # Translate the JSON string into python JSON representation jsonObject = decoder.decode(line) # Make a new tweet and add it to the set of tweets that we have # TODO # For some reason this doesn't work when I add the date. Must fix. #tweet = Tweet(jsonObject["text"], jsonObject["created_at"]) tweet = Tweet(jsonObject["text"]) # print("Tweet content is " + tweet.content + ", and it was created on " + "NOT TODAY") tweetSet.append(tweet) # Some of the lines have encoding errors so ignore them except UnicodeDecodeError: pass except: pass print("--- Finished loading Tweets ---") print("--- Printing size of Tweets ---") # Print the number of tweets print(" Size: " + str(len(tweetSet))) #print("--- Making list of unique words in the Tweets ---") # Make a list of all the unique words in the tweets which will be the columns of the matrix.
num_positive = 0 num_negative = 0 for topic in topics: topic_counter += 1 # print "topic #", topic_counter count_fetched = 0 max_id = -1 while count_fetched < total_count: row_num = 0 search_results = twitter_api.search.tweets(q=topic, count=count_per_search, max_id=max_id) statuses = search_results["statuses"] search_results_len = len(statuses) # print "search_results_len" , search_results_len # per tweet processing while row_num < count_per_search: # and row_num < search_results_len: t = Tweet() status = statuses[row_num] #print status resp = json.dumps(status, indent=4) #print resp text = status["text"] t.text = text # Message based features t.length_tweet = len(text) t.num_words = len(text.split()) t.num_unique_chars = CommonUtil.count_unique_chars(text) t.num_hashtags = text.count("#") t.retweet_cnt = status["retweet_count"] max_id = status["id"] t.num_swear_words = CommonUtil.count_swear_words(text) t.num_at_emotions = text.count("@")
def getNextWindow(self): """ Retrieves the next window of Tweets and gives the previous list of Tweets also. Output: (self.currentTweets, oldBatch) """ if self.eof == 1: return -1 # Initialise time information for the iteration self.endTime = self.startTime + self.windowSize self.iterTime = self.startTime + self.batchSize # If empty then we have not yet got any Tweets index = 0 oldBatch = [] numberConsecutivePasses = 0 currentPyTimestamp = 0 while numberConsecutivePasses < 100 and currentPyTimestamp <= self.endTime: try: # Get all lines in the file line = self.jsonFile.readline() # The lines will contain a non-empty string until the eof # so we can break the loop when this happens if line == "": print("------ Reached the end of file ------") self.eof = 1 break # Translate the JSON string into python JSON representation line = unicode(line, errors = 'replace') jsonObject = self.decoder.decode(line) tweet = Tweet(jsonObject["text"], jsonObject["created_at"]) # Transforming Tweet timestamps into PYTHON time information currentTimestamp = datetime.strptime(tweet.date, '%a %b %d %H:%M:%S +0000 %Y') currentPyTimestamp = time.mktime(currentTimestamp.timetuple()) tweet.PyTime = currentPyTimestamp # If we have more than the window size then pop the oldest off if not self.firstIter: while self.currentTweets[0].PyTime < self.startTime: oldBatch.append(self.currentTweets.pop(0)) # Add the Tweet to the list self.currentTweets.append(tweet) numberConsecutivePasses = 0 # Some of the lines have encoding errors so ignore them except UnicodeDecodeError: numberConsecutivePasses += 1 print 'Error' pass except: numberConsecutivePasses += 1 print 'Error' pass self.startTime = self.startTime + self.batchSize self.firstIter = False return (self.currentTweets, oldBatch)
def fetch_tweets_for_topic(topic): total_count = 100 count_per_search = 100 row_num = 0 max_id = -1 tweet_list = [] tweet_text_list = [] count_fetched = 0 auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter_api = twitter.Twitter(auth=auth) while count_fetched < total_count: row_num = 0 search_results = twitter_api.search.tweets(q=topic, count=count_per_search, max_id=max_id) statuses = search_results["statuses"] search_results_len = len(statuses) # per tweet processing while row_num < count_per_search: t = Tweet() status = statuses[row_num] #print status resp = json.dumps(status, indent=4) #print resp text = status["text"] text = smart_str(text) text = text.replace("\n", " ") text = text.replace("|", "") text = text.replace("$", "") t.text = text # Message based features t.length_tweet = len(text) t.num_words = len(text.split()) t.num_unique_chars = CommonUtil.count_unique_chars(text) t.num_hashtags = text.count("#") t.retweet_cnt = status["retweet_count"] max_id = status["id"] t.num_swear_words = CommonUtil.count_swear_words(text) t.num_at_emotions = text.count("@") # Source based Features user_features = status["user"] t.registration_age = CommonUtil.count_num_days_from_today(user_features["created_at"]) t.num_followers = user_features["followers_count"] t.num_followee = user_features["friends_count"] if t.num_followee !=0: t.ratio_foll_followee = t.num_followers / t.num_followee is_verified = user_features["verified"] if is_verified: t.is_verified = 1 else: t.is_verified = 0 t.len_desc = len(user_features["description"]) t.len_screen_name = len(user_features["screen_name"]) user_url = user_features["url"] if user_url: t.has_url = 1 # Create tweet characteristics to write to file tweet_str = text + "|" + str(t.length_tweet) + "|" + str(t.num_words) + "|" + str(t.num_unique_chars) + "|" \ + str(t.num_hashtags) + "|" + str(t.retweet_cnt) + "|" + str(t.num_swear_words) + "|" \ + str(t.num_at_emotions) + "|" \ + str(t.registration_age) + "|" + str(t.num_followers) + "|" + str(t.num_followee) + "|" \ + str(t.is_verified) + "|" + str(t.len_desc) + "|" + str(t.len_screen_name) + "|" \ + str(t.has_url) tweet_list.append(tweet_str) tweet_text_list.append(smart_str(text)) row_num += 1 count_fetched += search_results_len # write the tweets to a file outfile = open("test_tweets.txt", "w") for tweet in tweet_list: outfile.write(tweet) outfile.write("\n") outfile.close() # convert the tweet string to comma separated string tweet_text_str = "" for tweet in tweet_text_list: tweet_text_str = tweet_text_str + "$" + tweet return tweet_text_str
def test_tweet_creation(): twitter = pickle.load(open("test/data/singleTwitterStatus","r")) # create the tweet obj doaddoadTweet = Tweet(twitter) assert doaddoadTweet.status == twitter assert doaddoadTweet.get_language_code() == "en"
proba_topid_pairs = [(predict_probas[i], self.profile_terms[i]['topid'], self.profile_terms[i]['title']) for i in range(len(self.profile_terms))] max_proba_topid_pair= max(proba_topid_pairs, key=lambda t:t[0][2]) relevant_topid, relevant_score, relevant_title = max_proba_topid_pair[1], max_proba_topid_pair[0][2], max_proba_topid_pair[2] return relevant_topid, relevant_score, relevant_title if __name__ == '__main__': re = Relevance_estimate() import pickle f = open('result_0.pkl','rb') status0= pickle.load(f) f.close() from Tweet import Tweet f = open('test0.txt','w') for s in status0: print >>f, '###########################################' print >>f, '###########################################' print >>f, '###########################################' print >>f, s[0] tweet = Tweet() tweet.load_tweepy(s[1]) print >>f, tweet.text if tweet.crawl_link_text() == 'Error': continue print >>f, re.estimate(tweet) f.close()