def get_historical_twitters(place_id, since_time, until_time): auth = _get_twitter_auth(api_access) api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) server_url = _couchdb_get_url() db = cdb(server_url, save_to_db) # places = api.geo_search(query="AU", granularity="country") # place_id = places[0].id tweets = tweepy.Cursor(api.search, q="place:%s" % place_id, since=since_time, until=until_time).items() print("Start getting historical tweets ...") while True: try: tweet = tweets.next() if geo_only: if tweet._json['geo'] is not None: # print(tweet._json) db.twput(tweet._json) else: # with open('hist.json', 'a') as f: # json.dump(tweet._json, f, indent=2) db.twput(tweet._json) except tweepy.TweepError: print("Rate limit reached. Sleeping for: 60 * 15") time.sleep(60 * 15) continue except StopIteration: print("Finished!") break
def load_city_education(self): if self.city: city_key = "{}_education".format(self.city.lower()) db = db_util.cdb(self.serverURL, "aurin") return db.getByKey(city_key) else: return None
def load_analysis(self): if self.city: db = db_util.cdb(self.serverURL, analysis_result_db) city_key = "{}_analysis_result".format(self.city.lower()) return db.getByKey(city_key) else: return None
def load_city_suburb_coordinates(self): if self.city: city_key = "{}_suburbs".format(self.city.lower()) db = db_util.cdb(self.serverURL, "aurin") return db.getByKey(city_key) else: return None
def save_analysis(self, analysis_result): """ Save analysis result to database. This will replace the document with the same id. """ db = db_util.cdb(self.serverURL, analysis_result_db) analysis_city_id = "{}_analysis_result".format(self.city.lower()) db.put(analysis_result, analysis_city_id)
def load_tweet_data(self): """ Load all data from given city :return: JSON objects """ db = db_util.cdb(self.serverURL, tweet_db) city_key = self.city return db.getByCity(city_key)
def load_period_tweet_data(self, start_ts, end_ts): """ Load period data between given timestamps :return: JSON objects """ db = db_util.cdb(self.serverURL, tweet_db) cityData = db.getByBlock(start_ts=start_ts, end_ts=end_ts, cityname=self.city) return cityData
def get_twitters_by_userIDs(): server_url = _couchdb_get_url() db1 = cdb(server_url, save_to_db1) db2 = cdb(server_url, save_to_db2) auth = get_twitter_auth() api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) # grab tweets by each userID with open("userId_search_record.json", 'r') as f: for line in f: userId_search_record = json.loads(line) new_userId_search_record = {} for user_id, startId in userId_search_record.items(): userId, endId = get_all_tweets(api, db1, db2, user_id, startId) new_userId_search_record[userId] = endId with open("userId_search_record2.json", 'a') as f: json.dump(new_userId_search_record, f) f.write("\n")
def on_data(self, raw_data): server_url = _couchdb_get_url() db = cdb(server_url, save_to_db) raw_data = json.loads(raw_data) try: if geo_only: if raw_data['geo'] is not None: db.twput(raw_data) else: # with open('streaming.json', 'a') as f: # json.dump(raw_data, f, indent=2) db.twput(raw_data) return True except BaseException as e: print("Error on_data:%s" % str(e)) time.sleep(5) return True
""" sample usage on db_util.py Unimelb vpn required to run the code Couchdb UI can be accessed through: http://172.26.130.149:5984/_utils/ username/password: admin/admin1234 to access the CouchDB instance, download couchDB.pem from Slack and run: ssh -i couchDB.pem [email protected] """ from couchDB import db_util import json import argparse serverURL = "http://*****:*****@172.26.130.149:5984/" """sample1, connect to server without specific database""" #connect to server with URL couchserver = db_util.cdb(serverURL) #create database, if needed ##couchserver.createDB('sample') #show all databases on server couchserver.showDBs() """sample2 connect to server with specific database > recommanded way """ db = db_util.cdb(serverURL, "sample") #sample data of twitter and normal(from AURIN) #twitter data needs to have field [id_str] twitterdata = json.loads( '{"id_str":"1252949121519906816","type":"twitter", "text":"I#newthingfortheday"}' )
def load_aus_language(self): key = "australia_languages" db = db_util.cdb(self.serverURL, "aurin") return db.getByKey(key)
def load_aus_demographics(self): key = "australia_demographics" db = db_util.cdb(self.serverURL, "aurin") return db.getByKey(key)