def get_historical_twitters(place_id, since_time, until_time):
    auth = _get_twitter_auth(api_access)
    api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

    server_url = _couchdb_get_url()
    db = cdb(server_url, save_to_db)

    # places = api.geo_search(query="AU", granularity="country")
    # place_id = places[0].id
    tweets = tweepy.Cursor(api.search, q="place:%s" % place_id, since=since_time, until=until_time).items()
    print("Start getting historical tweets ...")
    while True:
        try:
            tweet = tweets.next()
            if geo_only:
                if tweet._json['geo'] is not None:
                    # print(tweet._json)
                    db.twput(tweet._json)
            else:
                # with open('hist.json', 'a') as f:
                #     json.dump(tweet._json, f, indent=2)
                db.twput(tweet._json)
        except tweepy.TweepError:
            print("Rate limit reached. Sleeping for: 60 * 15")
            time.sleep(60 * 15)
            continue
        except StopIteration:
            print("Finished!")
            break
Ejemplo n.º 2
0
 def load_city_education(self):
     if self.city:
         city_key = "{}_education".format(self.city.lower())
         db = db_util.cdb(self.serverURL, "aurin")
         return db.getByKey(city_key)
     else:
         return None
Ejemplo n.º 3
0
 def load_analysis(self):
     if self.city:
         db = db_util.cdb(self.serverURL, analysis_result_db)
         city_key = "{}_analysis_result".format(self.city.lower())
         return db.getByKey(city_key)
     else:
         return None
Ejemplo n.º 4
0
 def load_city_suburb_coordinates(self):
     if self.city:
         city_key = "{}_suburbs".format(self.city.lower())
         db = db_util.cdb(self.serverURL, "aurin")
         return db.getByKey(city_key)
     else:
         return None
Ejemplo n.º 5
0
 def save_analysis(self, analysis_result):
     """
     Save analysis result to database. This will replace the document with the same id.
     """
     db = db_util.cdb(self.serverURL, analysis_result_db)
     analysis_city_id = "{}_analysis_result".format(self.city.lower())
     db.put(analysis_result, analysis_city_id)
Ejemplo n.º 6
0
 def load_tweet_data(self):
     """
     Load all data from given city
     :return: JSON objects
     """
     db = db_util.cdb(self.serverURL, tweet_db)
     city_key = self.city
     return db.getByCity(city_key)
Ejemplo n.º 7
0
 def load_period_tweet_data(self, start_ts, end_ts):
     """
     Load period data between given timestamps
     :return:  JSON objects
     """
     db = db_util.cdb(self.serverURL, tweet_db)
     cityData = db.getByBlock(start_ts=start_ts,
                              end_ts=end_ts,
                              cityname=self.city)
     return cityData
Ejemplo n.º 8
0
def get_twitters_by_userIDs():
    server_url = _couchdb_get_url()
    db1 = cdb(server_url, save_to_db1)
    db2 = cdb(server_url, save_to_db2)

    auth = get_twitter_auth()
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    # grab tweets by each userID
    with open("userId_search_record.json", 'r') as f:
        for line in f:
            userId_search_record = json.loads(line)
            new_userId_search_record = {}
            for user_id, startId in userId_search_record.items():
                userId, endId = get_all_tweets(api, db1, db2, user_id, startId)
                new_userId_search_record[userId] = endId
                with open("userId_search_record2.json", 'a') as f:
                    json.dump(new_userId_search_record, f)
                    f.write("\n")
    def on_data(self, raw_data):
        server_url = _couchdb_get_url()
        db = cdb(server_url, save_to_db)
        raw_data = json.loads(raw_data)
        try:
            if geo_only:
                if raw_data['geo'] is not None:
                    db.twput(raw_data)
            else:
                # with open('streaming.json', 'a') as f:
                #     json.dump(raw_data, f, indent=2)
                db.twput(raw_data)
            return True

        except BaseException as e:
            print("Error on_data:%s" % str(e))
            time.sleep(5)
        return True
""" sample usage on db_util.py
Unimelb vpn required to run the code
Couchdb UI can be accessed through:   http://172.26.130.149:5984/_utils/
username/password: admin/admin1234

to access the CouchDB instance, download couchDB.pem from Slack and run:
ssh -i couchDB.pem [email protected]
"""
from couchDB import db_util
import json
import argparse

serverURL = "http://*****:*****@172.26.130.149:5984/"
"""sample1, connect to server without specific database"""
#connect to server with URL
couchserver = db_util.cdb(serverURL)

#create database, if needed
##couchserver.createDB('sample')
#show all databases on server
couchserver.showDBs()
"""sample2 connect to server with specific database
    > recommanded way
"""
db = db_util.cdb(serverURL, "sample")

#sample data of twitter and normal(from AURIN)
#twitter data needs to have field [id_str]
twitterdata = json.loads(
    '{"id_str":"1252949121519906816","type":"twitter", "text":"I#newthingfortheday"}'
)
Ejemplo n.º 11
0
 def load_aus_language(self):
     key = "australia_languages"
     db = db_util.cdb(self.serverURL, "aurin")
     return db.getByKey(key)
Ejemplo n.º 12
0
 def load_aus_demographics(self):
     key = "australia_demographics"
     db = db_util.cdb(self.serverURL, "aurin")
     return db.getByKey(key)