def selectClassesLimit(self, limit): tweetsPositivos = [] tweetsNegativos = [] #Pegando Positivos cur = self.__conn.cursor() sql = """ SELECT id, tokens, original, classe, emojis FROM tweets WHERE classe='positive' and tokens != '' limit {0}""" sql = sql.format(limit) cur.execute(sql) rows = cur.fetchall() cur.close() for row in rows: tweet = Tweet(row[0],row[1],row[2],row[3],row[4]) tweetsPositivos.append(tweet) #Pegando Negativos cur = self.__conn.cursor() sql = """ SELECT id, tokens, original, classe, emojis FROM tweets WHERE classe='negative' and tokens != '' limit {0}""" sql = sql.format(limit) cur.execute(sql) rows = cur.fetchall() cur.close() for row in rows: tweet = Tweet(row[0],row[1],row[2],row[3],row[4]) tweetsNegativos.append(tweet) return tweetsPositivos, tweetsNegativos
def test_save_and_load_multiple_tweets(self): usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120') twt1 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '', 300, 'image url', 'URL') twt2 = Tweet('Donald Trump', usr.username, usr.user_id, '2020-03-01T08:33:23.000Z', 'NO!', 120, 3929, '', 300, 'image url', 'URL') twt_list = TweetList('donny') twt_list.add_tweet(twt1) twt_list.add_tweet(twt2) DA = TweetDA() DA.save_list(twt_list) load_list = DA.load_all(username='******') self.assertEqual(twt_list, load_list)
def readXML(xmlFIle): tree = ET.parse(xmlFIle) root = tree.getroot() tweets = [] for tweet in root.iter('tweet'): content = tweet.find('content').text sentiments = tweet.find('sentiments') polarity = sentiments[0].find('value').text polarity = polarityTagging(polarity) # polarity = polarityTagging3(polarity) # Other info: tweet_id = long(tweet.find('tweetid').text) user = tweet.find('user').text date = tweet.find('date').text lang = tweet.find('lang').text if content != None: tweet = tw.Tweet(tweet_id, user, date, lang, content, polarity) tweets.append(tweet) return tweets
def getTweets(username, nTweets): auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # Gets the past 20 previous tweets new_tweets = api.user_timeline(screen_name=username, count=nTweets) tweets = [] tweetsCSV = [tweet.text for tweet in new_tweets] for x in tweetsCSV: tweets.append((str)(emoji.demojize(x.encode('unicode_escape')))) dateCSV = [tweet.created_at for tweet in new_tweets] allData = {} # Key = Date, Value = tweet (Object from Tweet.py) for i in range(0, nTweets): try: text = re.sub(r'https:.*$', '', tweets[i]) # Removes all URLs text = re.sub(r'(\\[a-zA-Z0-9]+)', '', text) # Removes all emojis # print "Date: {} | Text: {} | Mentions: {} | Hashtags: {}".format(dateCSV[i], text, getMentions(text), getHashTags(text)) # For testing allData[dateCSV[i]] = Tweet.Tweet(text, getMentions(text), getHashTags(text)) except: break # If it reaches here then we have reached the end of this persons twitter page return allData
def getTweets(username): auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) auth_api = API(auth) api = tweepy.API(auth) # Gets the past 20 previous tweets new_tweets = api.user_timeline(id=username) tweets = [] tweetsCSV = [tweet.text for tweet in new_tweets] for x in tweetsCSV: try: tweets.append(emojiPattern.remove_emoji(x)) except: continue # This is a random statement to get rid of an error dateCSV = [tweet.created_at for tweet in new_tweets] allData = {} # Key = Date, Value = tweet (Object from Tweet.py) for i in range(0, (len(tweetsCSV))): try: text = (str)(tweetsCSV[i]) print "Text: {} | Mentions: {} | Hashtags: {}".format(text, getMentions(text), getHashTags(text)) allData[dateCSV[i]] = Tweet.Tweet(text, getMentions(text), getHashTags(text)) except: continue return allData
def load_tweets(self, path_dataset): entities = os.listdir(path_dataset) for entity in entities: path_file_entity = path_dataset + "/" + entity tweets = [] with open(path_file_entity, 'r') as tweets_file: for tweet in tweets_file: info_tweet = tweet.split('\t') ## id, author, entity lang , timestamp, corpus id_tweet = info_tweet[0] author = info_tweet[1] entity_code = info_tweet[2] lang = info_tweet[3] timestamp = info_tweet[4] corpus = info_tweet[5] tweets.append(tw.Tweet(id_tweet, author, entity_code, lang, timestamp, corpus)) self.__tweets[entity] = tweets
def getTweets(api): tweets = set() for _ in range(180): thisIterTweets = makeTwitterRequest(api) thisIterTweets = [Tweet.Tweet(tweet) for tweet in thisIterTweets] # convert to Tweet Object tweets |= set(thisIterTweets) return tweets
def make_tweet(mytweets): #Get user name author = input('What is your name? ') #Get tweet text = input('What would you like to tweet? ') #Check length of user_tweet is under 140 characters while len(text) > 140: print('Tweets can only be 140 characters') print() text = input('What would you like to tweet? ')1 #only save file if text is less than 140 characters #to prevent records with empty tweets if len(text) <= 140: #Create a tweet object named tweet-entry tweet_entry = Tweet.Tweet(author, text) #Add tweet_entry to beginning of mytweets list mytweets.insert(0, tweet_entry) #Save the mytweets list to a file save_tweets(mytweets) print(author, ', your tweet has been saved.', sep='')
def save_tweet(self, tweet_dict): """Saves one tweet object to ES in the index of name tweet-index-<YEAR>-<#WEEK>""" INDEX_NAME = self.get_tweet_index() tweet_dict = self.purge_key_deep(tweet_dict, 'media') tweet_dict = self.purge_key_deep(tweet_dict, 'urls') tweet_dict = self.purge_key_deep(tweet_dict, 'url') time_str = tweet_dict.get('created_at', '') if time_str: tweet_dict['created_at'] = datetime.datetime.strptime(time_str, "%a %b %d %H:%M:%S +0000 %Y") else: tweet_dict['created_at'] = None tweet_dict.update({u'_id': tweet_dict.get(u'id', "00000"), u'_index': INDEX_NAME}) tweet = Tweet(**tweet_dict) res = tweet.save() self.tweet_per_follower_count+=1 if res: self.logger.info(" Success: tweet #" + \ str(self.tweet_per_follower_count) + \ " for user_id: " + str(tweet_dict['user']['id']) + \ " saved in index: " + INDEX_NAME + \ " tweet_id: " + str(tweet_dict['id'])) else: self.logger.error(" Failure: tweet #" + \ str(self.tweet_per_follower_count) + \ " for user_id: " + str(tweet_dict['user']['id']) + \ " not saved in index: " + INDEX_NAME + \ " tweet_id: " + str(tweet_dict['id']))
def initWithFilteredData(self, data): self.id = data['id'] self.name = data['name'] for tweetID in data['tweets']: tweetObj = Tweet.Tweet(data['tweets'][str(tweetID)]) self.tweets[data['tweets'][str(tweetID)]['id']] = tweetObj self.tweets_count += 1 self.urls_count += self.tweets[data['tweets'][str(tweetID)] ['id']].getAttr('urls_count')
def test_save_and_load_tweet(self): usr = TwitterUser('@POTUS', 'white house', 'president of the united states', 'June 2009', '14k', '120') twt = Tweet('Donald Trump', usr.username, usr.user_id, '2020-02-01T06:33:23.000Z', 'Rocketman!', 120, 3929, '', 300, 'image url', 'URL') DA = TweetDA() DA.save_tweet(twt) load_twt = DA.load_tweet(twt.tweet_id) self.assertEqual(twt, load_twt)
def getNextTweet(self): tweet = Tweet() line = self.f.readline() if line == "": # print "Missing line:" print self.tweets self.f.close() return None try: o = json.loads(line) #print o['doc'] #o = json.loads(o['doc']) except json.decoder.JSONDecodeError as e: print "Problematic JSON string:" print line print e.args self.problems += 1 return None # Extract GPS: try 'geo' tag, fallback to 'location' tag try: if o['doc']['geo'] != None: (tweet.lat, tweet.lon) = o['doc']['geo']['coordinates'] self.GPStweets += 1 else: try: tweet.location = o['doc']['location'] match = self.rexLatLon.search(tweet.location) if bool(match): self.GPStweets += 1 (tweet.lat, tweet.lon) = float(match.group('lat')), float(match.group('lon')) except KeyError: print "Location Key Error" pass #raise self.tweets += 1 if self.tweets%100000 == 0: print "Tweets so far: " + str(self.tweets) #tweet.WRONGuserID = o['from_user_id'] tweet.userName = o['doc']['from_user'] tweet.text = o['doc']['text'].encode("utf-8") tweet.createdAt = o['doc']['created_at'] tweet.profile_image = o['doc']['profile_image_url'] tweet.msgID = int(o['doc']['id']) #tweet.sentiment = float(o['doc']['sentiment']) #tweet.json = line.strip() tweet.datetime = datetime.strptime(tweet.createdAt, "%a, %d %b %Y %H:%M:%S +0000") return (tweet, line) except KeyError: print "KeyError:" print line # o print "TWEETS" print self.tweets return "Err"
def initWithRawData(self, data): self.id = data[0]['id'] self.name = data[0]['name'] for i in range(len(data)): if (i not in [0, 1, 2]): tweetObj = Tweet.Tweet(data[i]) self.tweets[data[i]['id']] = tweetObj self.tweets_count += 1 self.urls_count += self.tweets[data[i]['id']].getAttr( 'urls_count')
def selectAllNoEmoji(self): cur = self.__conn.cursor() cur.execute(""" SELECT id, tokens, original, classe, emojis FROM tweets WHERE tokens != '' and classe='NoEmoji' """) rows = cur.fetchall() cur.close() tweets = [] for row in rows: tweet = Tweet(row[0],row[1],row[2],row[3], row[4]) tweets.append(tweet) return tweets
def termometer(): '''retornar no intervalo de 175 todas as ocorrencias de hate e o tipo (img || txt)''' if request.method == 'GET': tweets = getTweets() response = [] for tweet in tweets: t = Tweet(tweet[0], tweet[1], tweet[2], tweet[3], tweet[4]) response.append(t) print(t) return jsonify('nudity')
def _parse_csv(self, fname): #create function to help with converting the csv to a tweetlist df = pd.read_csv(fname) tweets = TweetList('output') #create a tweetlist from this file for i, row in df.iterrows(): twt = Tweet(row['UserScreenName'], row['UserName'], self.user.user_id, row['Timestamp'], row['Text'], row['Likes'], row['Retweets'], row['Emojis'], row['Comments'], row['Image link'], row['Tweet URL']) tweets.add_tweet(twt) return tweets
def test4(training_data, folds: int, portionOfDataSet: float, test_ratio: float): """ This is with more tokens """ X = [] # training data y = [] # Class labels. Disaster or not disaster Tweets = [] print("Using {} % of data set...".format(portionOfDataSet * 100)) print("Parsing text...") for i in range(int(len(training_data.index) * portionOfDataSet)): Tweets.append( Tweet.Tweet(training_data["statusid"], training_data["ttext"]))
def get_tweets(self): tweet_objects = self.response_json["data"] tweets = [] for obj in tweet_objects: tweets.append( Tweet.Tweet( id=obj["id"], text=obj["text"], # TODO: decide whether to alter Tweet class author='hypefury')) return tweets
def make_tweet(): """ This function will take in the information required to make a tweet and then append it to the tweet list, while checking for errors """ author = input('What is your name? ') while True: message = input('What would you like to tweet? ') if len(message) > 140: print('Tweets can only be 140 characters!') else: tweet = tw.Tweet(author, message) tweet_list.append(tweet) print(tweet.get_author() + ', your tweet has been saved.') break
def getNextTweet(self): tweet = Tweet() line = self.f.readline() if line == "": self.f.close() return None try: o = json.loads(line) except json.decoder.JSONDecodeError: print "Problematic JSON string:" print line self.problems += 1 return None # Extract GPS: try 'geo' tag, fallback to 'location' tag o.update({'lat': None, 'lon': None}) if o['geo'] != None: self.GPStweets += 1 (tweet.lat, tweet.lon) = o['geo']['coordinates'] (o['lat'], o['lon']) = o['geo']['coordinates'] else: try: tweet.location = o['location'] match = self.rexLatLon.search(tweet.location) if bool(match): self.GPStweets += 1 (tweet.lat, tweet.lon) = float(match.group('lat')), float( match.group('lon')) (o['lat'], o['lon']) = (tweet.lat, tweet.lon) except KeyError: print "Location Key Error" pass #raise self.tweets += 1 if self.tweets % 100000 == 0: print "Tweets so far: " + str(self.tweets) #tweet.WRONGuserID = o['from_user_id'] tweet.userName = o['from_user'] tweet.text = o['text'].encode("utf-8") tweet.createdAt = o['created_at'] tweet.profile_image = o['profile_image_url'] tweet.msgID = int(o['id']) tweet.json = line.strip() return (o, tweet, line)
def write_tweets(self): print "Getting tweets" searched_tweets = [ status for status in tweepy.Cursor(self.twitter_api.search, q=self.stock_query).items( self.max_limit) ] collected_tweets = {} for status in searched_tweets: tweet_id = status.id text = repr(status.text) retweet_count = status.retweet_count quotes_count = "--not implemented--" # HOW to get this? favorites_count = status.favorite_count author = status.author is_news = "--not implemented--" # DEFINE LIST OF VALID NEWS AUTHORS timestamp = status.created_at # Instantiate tweet object, which processes the info tweet = Tweet.Tweet(tweet_id=tweet_id, text=text, retweet_count=retweet_count, quotes_count=quotes_count, favorites_count=favorites_count, author=author, is_news=is_news, timestamp=timestamp) # Sets the path of csv file to first timestamp we gather if self.csv_path == "": self.csv_path = str(self.stock_query[1:]) + str( tweet.timestamp) + ".csv" print "Writing data to: " + self.csv_path tweet.writeHeaderToCsv(self.csv_path) #hashmap for duplicating tweets if tweet.tweet_id not in collected_tweets.keys(): collected_tweets[tweet.tweet_id] = tweet # Writes the processed info to the file at csv_path for tweet_id in collected_tweets.keys(): collected_tweets[tweet_id].writeDataToCsv(self.csv_path)
def processTweetFile(self, jfile): tweetObjs = [] tweets = io_geojson.processTweets(jfile) for t in tweets: tweetObjs.append(Tweet.Tweet(t)) random.seed(1212) # create new map for new file self.map = folium.Map(location=phx_coords) self.map.zoom_start = 8 for tw in tweetObjs: latitude, longitude = tw.getRandPointInBoundingBox() folium.Marker([latitude, longitude], popup=tw.twScreenName).add_to(self.map) self.map.save(self.mapFile) self.webView.setHtml(open(self.mapFile,'r').read())
def transform_and_load(self, json_response, recreate_db): # inspect response line (optional) print("json printed: ", json.dumps(json_response, indent=4, sort_keys=True)) # dismantle the fields tweet_id = json_response["data"]["id"] tweet_text = json_response["data"]["text"] tweet_lang = json_response["data"]["lang"] tweet_created_at = json_response["data"]["created_at"] tweet_place_id = json_response["includes"]["places"][0]["id"] tweet_place_geo_bbox = json_response["includes"]["places"][0]["geo"][ "bbox"] tweet_place_full_name = json_response["includes"]["places"][0][ "full_name"] tweet_place_type = json_response["includes"]["places"][0]["place_type"] tweet_country_code = json_response["includes"]["places"][0][ "country_code"] stream_rule_tag = json_response["matching_rules"][0]["tag"] # construct tweet_data_dict tweet_data_dict = { 'twitter_id': tweet_id, 'text': tweet_text, 'lang': tweet_lang, 'created_at': tweet_created_at, 'places_geo_place_id': tweet_place_id, 'places_geo_bbox': tweet_place_geo_bbox, 'places_full_name': tweet_place_full_name, 'places_place_type': tweet_place_type, 'places_country_code': tweet_country_code, 'stream_rule_tag': stream_rule_tag } # construct a Tweet() object # data passed in to Tweet() has to be in a dictionary format single_tweet = Tweet(**tweet_data_dict) # inspect transformed Tweet() object print("single_tweet: ", single_tweet) # load data self.start_load(single_tweet, recreate_db)
def append_next_set(max_tweet_id): user_timeline = twitter.get_user_timeline(user_id=TWITTER_USER_ID, count=200, max_id=max_tweet_id, trim_user=True, exclude_replies=True, include_rts=False) for item in user_timeline: tweet_id = item["id"] tweet_status = item["text"] tweet_date = item["created_at"] rt_count = item["retweet_count"] fav_count = item["favorite_count"] next_tweet = Tweet.Tweet(tweet_id, tweet_status, tweet_date, rt_count, fav_count) if (len(ALL_TWEETS) == 0) or ( tweet_id != ALL_TWEETS[len(ALL_TWEETS) - 1].tweet_id): ALL_TWEETS.append(next_tweet)
def read_data_from_database(): mydb = mysql.connector.connect(user="******", password="******", host="localhost", database="tweetermysql_2018-07-30") my_cursor = mydb.cursor() my_cursor.execute( ' select ID, MetaData, Date, Section from news_2017_01 where day(Date) = "1" limit 2' ) my_result = my_cursor.fetchall() data = list() for item in my_result: u, m_data, date, section = item t = tweet.Tweet(u, section, m_data, datetime.strptime(date, '%Y-%m-%d')) data.append(t) return data
def create_tweet(tweets): # Ask for the user's name name = input("\nWhat is your name? ") while (True): # Ask for the user's message text = input("What would you like to tweet? ") # If the tweet is too long, display an error and ask again if (len(text) > 140): print("\nTweets can only be 140 characters!\n") else: # ...otherwise, the tweet is <140 characters, so stop looping break # Create a Tweet object using the user's name and message tweet = Tweet.Tweet(name, text) # Add the Tweet to the tweets list tweets.append(tweet) # Print a confirmation that the Tweet has been made print(name, ", your tweet has been saved.", sep="")
def load_tweets(file_name): tweets = [] current_start = 0 text = "" tweet_done = False start_pattern = re.compile("^[0-9]{6,}[,\s]") end_pattern = re.compile("^.+[\s][0-9]{6,}$") for open(file_name) as fn: for line in file_name: if start_pattern.match(line) or end_pattern.match(line) or "haiti" in file_name.split("/")[-1]: tweet_done = True text += line if tweet_done: start = current_start end = start + len(text) current_start = end tweets.append(Tweet.Tweet(text.replace("\n", " ").strip(), start, end, file_name)) tweet_done = False text = "" tweet_id = ""
def on_status(self, status): print "Got a tweet..." if self.TWEET_LIMIT == 0 or self.tweet_count < self.TWEET_LIMIT: #Can add a condition for saving a tweet print "Accepted!" tweet_id = status.id text = status.text.encode("UTF-8") retweet_count = status.retweet_count quotes_count = "--not implemented--" # HOW to get this? favorites_count = status.favorite_count author = status.author is_news = "--not implemented--" # DEFINE LIST OF VALID NEWS AUTHORS timestamp = status.created_at #Instantiate tweet object, which processes the info tweet = Tweet.Tweet(tweet_id=tweet_id, text=text, retweet_count=retweet_count, quotes_count=quotes_count, favorites_count=favorites_count, author=author, is_news=is_news, timestamp = timestamp) #Sets the path of csv file to first timestamp we gather if self.csv_path == '': self.csv_path = str(tweet.timestamp) + '.csv' print "Writing data to: "+self.csv_path tweet.writeHeaderToCsv(self.csv_path) #Writes the processed info to the file at csv_path tweet.writeDataToCsv(self.csv_path) self.tweet_count += 1 else: print "Rejected. Disconnecting..." print "Data written to file: "+self.csv_path return False
def populate_tweets(self, tweets, convert_to_tweet=True, **kwargs): """ Populates tweets from a list of python dictionary objects. Use populate_tweets_from_api to get if from the API :param tweets: list of python dictionaries (i.e. from json.loads()) :param kwargs: keywords to pass to Tweet constructor :return: """ if len(tweets) == 0: return i = 0 if self.earliest_tweet_time is None: self.earliest_tweet_time = datetime.datetime.max self.latest_tweet_time = datetime.datetime.min for t in tweets: if convert_to_tweet: tweet = Tweet.Tweet(t, noise_tokens=self.stopwords, **kwargs) else: tweet = t self.tweets.append(tweet) if tweet.created_at and type(tweet.created_at) is not str and type(tweet.created_at) is not unicode: if tweet.created_at < self.earliest_tweet_time: self.earliest_tweet_time = tweet.created_at if tweet.created_at > self.latest_tweet_time: self.latest_tweet_time = tweet.created_at ##mentions for mention in tweet.mentions: self.mentioned[mention] += 1 for mention_sn in tweet.mentions_sns: self.mentioned_sns[mention_sn] += 1 ##replies if tweet.reply_to is not None: self.replied_to[tweet.reply_to] += 1 if tweet.reply_to_sn is not None: self.replied_to_sns[tweet.reply_to_sn] += 1 ##retweets if tweet.retweeted is not None: self.retweeted[tweet.retweeted] += 1 if tweet.retweeted_sn is not None: self.retweeted_sns[tweet.retweeted_sn] += 1 if tweet.retweeted_user_tweet_count > 0: self.retweeted_tweets.append(i) ##tokens and HTs if tweet.tokens: for term in tweet.tokens: self.tokens[term] += 1 for ht in tweet.hashtags: self.hashtags[ht] += 1 i += 1 # INFO ABOUT USER if convert_to_tweet: user_data = tweets[-1]['user'] self.populate_user_data(user_data, do_parse_date=True) else: self.copy_user_data(tweets[-1].user)
import GetTemp, Tweet import time, datetime interval = 60 # in minute(s) get = GetTemp.GetTemp("/dev/tty.usbmodem14401", 115200) tweet = Tweet.Tweet() prevmin = 0 nextmin = 0 def makeContent(): values = get.getFromArduino() temp = values["temp"] humidity = values["humidity"] thi = int(get.calcTHI(temp, humidity)) feeling = get.thiFeeling(thi) now = datetime.datetime.now() strftime = now.strftime("%Y年%m月%d日 %H:%M") content = strftime + "\n現在の室温: " + str(temp) + "°C\n現在の湿度: " + str( humidity) + "%\n不快指数: " + str(thi) + " (" + feeling + ")" return content def tweetIt(content): tweet.post(content) def main():