def addAllToDB(conn, tweets, formatted): if formatted: formattedTweets = tweets else: formattedTweets = [] for tweet in tweets: formattedTweets.append(formatTweet(conn, tweet)) for tweet in formattedTweets: #print("adding user: %s" % tweet['user']) db.addUser(conn, tweet['user']) db.addTweetToDB(conn, tweet['tweet'])
def formatTweet(conn, tweet): userDetails = {} tweetDetails = {} userDetails['userID'] = tweet['user']['id'] userDetails['screenName'] = tweet['user']['screen_name'] userDetails['followerCount'] = tweet['user']['friends_count'] # FIX! tweetDetails['userID'] = str(tweet['user']['id']) tweetDetails['tweetText'] = tweet['text'] tweetDetails['tweetID'] = tweet['id_str'] tweetDetails['favCount'] = tweet['favorite_count'] tweetDetails['rtCount'] = tweet['retweet_count'] datePosted = datetime.datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S +0000 %Y") #adjust timezone by adding 8 to hours <--maybe make this part a bit more formal? #print datePosted datePosted = datePosted + datetime.timedelta(hours=8) tweetDetails['datePosted'] = datePosted.strftime("%Y-%m-%d %H:%M:%S") #DATETIME - format: YYYY-MM-DD HH:MI:SS tweetDetails['tweetLang'] = tweet['lang'] #tweetDetails['expandedURL'] = tweet['expanded_url'] tweetDetails['retweeted'] = tweet['retweeted'] tweetDetails['favorited'] = tweet['favorited'] tweetDetails['userDetails'] = userDetails #datetime format 'created_at': u'Sat Oct 24 23:22:20 +0000 2015' coordinates = tweet['coordinates'] # print(coordinates) if coordinates: tweetDetails['coordinates'] = tweet['coordinates'] # Check if tweet is a retweet. If yes and not yet in DB then add if "retweeted_status" in tweet.keys(): retweet = tweet['retweeted_status'] rtID = retweet['id_str'] if not db.checkTweet(conn, rtID): #tweet doesn't exist in DB yet #format tweet formattedTweet = formatTweet(conn, retweet) db.addUser(conn, formattedTweet['user']) db.addTweetToDB(conn, formattedTweet['tweet']) #add to DB else: # Update tweet with new retweet and favorite count! pass tweetDetails['rtSource'] = rtID else: tweetDetails['rtSource'] = None #Add to passed params # Check if tweet is a reply replyTo = tweet['in_reply_to_status_id_str'] tweetDetails['inReplyToStatusID'] = None if replyTo: tweetDetails['inReplyToStatusID'] = "uncheckedReplyTweet" # #if tweet is a reply # #check if tweet exists in database # if not db.checkTweet(conn, replyTo): # #if tweet is not in database # #fetch tweet from API # #print("SEARCHING FOR TWEET ID: %s" % replyTo) # try: # recipientTweet = twitter.show_status(id=replyTo) # formattedTweet = formatTweet(conn, recipientTweet) # db.addUser(conn, formattedTweet['user']) # db.addTweetToDB(conn, formattedTweet['tweet']) # except TwythonRateLimitError as e: # print("[ERROR] TwythonRateLimitError on show_status") # print(e) # retryIn = datetime.datetime.fromtimestamp(float(e.retry_after)) - datetime.datetime.now() # retryIn = float(retryIn.total_seconds()) # print("Time to sleep: %d" % math.ceil(retryIn)) # #raw_input("Press enter to continue to sleep") # print("Time at start of sleep:") # print datetime.datetime.now() # print("End of sleep:") # print(datetime.datetime.fromtimestamp(float(e.retry_after))) # retryIn = datetime.datetime.fromtimestamp(float(e.retry_after)) - datetime.datetime.now() # retryIn = float(retryIn.total_seconds()) # if retryIn > 0: # time.sleep(math.ceil(retryIn)) # pass # except Exception as e: # print("ERROR: Error while populating database.") # File saved as %s\n" % filename) # print(e) # pass # tweetDetails['inReplyToStatusID'] = replyTo # else: # # Update tweet with new retweet and favorite count! # pass # tweetDetails['inReplyToStatusID'] = replyTo else: tweetDetails['inReplyToStatusID'] = None return {"user":userDetails, "tweet":tweetDetails}