def downloadAllTweets(start, end, tweetTargetSource): numRequests = 0 tweetIdSource = 'data/tweets.csv' dataLines = readFile(tweetIdSource).splitlines()[start:end] reducedDataLines = sample(dataLines, 400000) #Choosing random sample of 400000\ fieldnames = ['id_str', 'created_at', 'coordinates', 'hashtags', 'text'] setHeaders(tweetTargetSource, fieldnames) requestStartRow = 0 requestEndRow = 99 while(requestEndRow <= len(reducedDataLines)): print "Parsing rows " + str(requestStartRow) + '-' + str(requestStartRow + 6000) + ' out of ' + str(len(reducedDataLines)) print str(float(requestStartRow)/float(len(reducedDataLines)) * 100) + "% complete" #Inner while loop handles API Rate limit logic while (numRequests < 60): if (getRateLimit('statuses')['resources']['statuses']['/statuses/lookup']['remaining'] == 0): break tweetIdList = parseTweetIds(reducedDataLines, requestStartRow, requestEndRow) tweetData = getTweetsFromId(tweetIdList) print "Request: " + str(numRequests) if (tweetData != None): writeToFile(tweetData, tweetTargetSource, fieldnames) requestStartRow += 100 requestEndRow += 100 numRequests += 1 print "Rate Limit Exceeded. Waiting...\n" while (getRateLimit('statuses')['resources']['statuses']['/statuses/lookup']['remaining'] == 0): sleep(120) #Suspends execution until rate limit refreshed numRequests = 0 print "Done!"
def saveTweetsToFile(tweetsAboutCountries, targetFileBasePath): fieldnames = ['id_str', 'created_at', 'coordinates', 'hashtags', 'text', 'distanceFromCountry'] for country in tweetsAboutCountries: countryFilePath = targetFileBasePath + country + '_tweets.csv' setHeaders(countryFilePath, fieldnames) writeToFile(tweetsAboutCountries[country], countryFilePath, fieldnames)