def connect_CouchDB(): """Connect to CouchDB""" args = core.config('couchdb') db_str="tweets_all" # Construct a URL from the arguments protocol = 'http' # Create a safe URL that we can print (omits login/password) url = url_safe = '{protocol}://{ip}:{port}/'.format( protocol = protocol, ip = args['ip_address'], port = str(args['port']) ) # Construct an updated URL if a login and password is supplied if ('login' in args) and ('password' in args): url = '{protocol}://{login}:{password}@{ip}:{port}/'.format( protocol = protocol, login = args['login'], password = args['password'], ip = args['ip_address'], port = str(args['port']) ) # Attempt to connect to CouchDB server try: # Calling couchdb.Server will not throw an exception couch = couchdb.Server(url) print("Connected to CouchDB server at " + url_safe) except ConnectionRefusedError: print("No CouchDB server at " + url_safe) raise except couchdb.http.Unauthorized as e: print("Connection to CouchDB server refused: " + str(e)) raise except Exception as e: print( "Failed to connect to CouchDB server at " + url_safe + ". An unexpected exception was raised: " + str(e) ) raise # Attempt to connect to CouchDB database try: _db = couch[db_str] print ("Connected to database: " + db_str) except couchdb.http.ResourceNotFound: try: _db = couch.create(db_str) print ("Creating new database: " + db_str) except couchdb.http.Unauthorized: raise except Exception as e: raise except couchdb.http.Unauthorized: raise except Exception as e: raise return couch
def __init__(self, bucket_name): """""" args = core.config('nectar') self.bucket_name = bucket_name try: self.connection = boto.s3.connection.S3Connection( aws_access_key_id=args['ec2_access_key'], aws_secret_access_key=args['ec2_secret_key'], port=int(args['s3']['port']), host=args['s3']['host'], is_secure=True, validate_certs=False, calling_format=boto.s3.connection.OrdinaryCallingFormat() ) except: raise # Open the bucket try: # get_bucket() requires validate=False to work with python3 self.bucket = self.connection.get_bucket( self.bucket_name, validate=False ) except: raise # Save the URL for public access self.public_url = args['s3']['public_url']
def __init__(self): """""" args = core.config('twitter', 'OAuth') # Initialise Twitter communication auth = tweepy.OAuthHandler(args['consumer_key'], args['consumer_secret']) auth.set_access_token(args['access_token'], args['access_token_secret']) try: self.api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) # tweepy.API constructor does not seem to throw an exception for # OAuth failure. Use API.verify_credentials() to validate OAuth # instead cred = self.api.verify_credentials() print("OAuth connection with Twitter established through user @" + cred.screen_name + "\n") except tweepy.TweepError as oauth_error: print("OAuth connection with Twitter could not be established") raise oauth_error except: raise self.db_tweets = db('tweets') self.db_tweets_urls = db('tweets_urls') self.db_tweets_archive = db('tweets_archive') self.db_outlets = db('outlets') self.db_articles = db('articles') self.senti = SentimentAnalyser() self.id_to_outlet = self.db_outlets.get_users() self._update_since_ids() self.source_ext = {'api': [], 'wa': {}}
def load_api(): ''' Function that loads the twitter API after authorizing the user. ''' args = core.config('twitter', 'OAuth') consumer_key = args['consumer_key'] consumer_secret = args['consumer_secret'] access_token = args['access_token'] access_secret = args['access_secret'] auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) # load the twitter API via tweepy return tweepy.API(auth)
def iterate_followers(self): # Download list of users from the tweets database try: users_dict = self.db_outlets.get_users() users = list(users_dict) except: raise queue_len = len(users) node = core.config('node') processes = core.config('processes') if node is not None and processes is not None: chunk = int(math.floor(len(users) / int(processes))) start = int(node) * chunk end = start + chunk else: start = 0 end = len(users) for num in range(start, end): self.iterate_follower_timelines(users[num])
def connect(self): """""" args = core.config('couchdb') # Construct a URL from the arguments protocol = 'http' if 'https' in args: if args['https'] is True: protocol = 'https' # Create a safe URL that we can print (omits login/password) url = url_safe = '{protocol}://{ip}:{port}/'.format( protocol = protocol, ip = args['ip_address'], port = str(args['port']) ) # Construct an updated URL if a login and password is supplied if ('login' in args) and ('password' in args): url = '{protocol}://{login}:{password}@{ip}:{port}/'.format( protocol = protocol, login = args['login'], password = args['password'], ip = args['ip_address'], port = str(args['port']) ) # Attempt to connect to CouchDB server try: # Calling couchdb.Server will not throw an exception couch = couchdb.Server(url) # Attempt to GET from the CouchDB server to test connection couch.version() print("Connected to CouchDB server at " + url_safe) except ConnectionRefusedError: print("No CouchDB server at " + url_safe) raise except couchdb.http.Unauthorized as e: print("Connection to CouchDB server refused: " + str(e)) raise except Exception as e: print( "Failed to connect to CouchDB server at " + url_safe + ". An unexpected exception was raised: " + str(e) ) raise # Attempt to connect to CouchDB database try: self._db = couch[self.db_str] print ("Connected to database: " + self.db_str) # The python-couchdb docs says that a PreconditionFailed # exception is raised when a DB isn't found. But in practice it # throws a ResourceNotFound exception (CouchDB == 1.0.1) except couchdb.http.ResourceNotFound: try: self._db = couch.create(self.db_str) print ("Creating new database: " + self.db_str) except couchdb.http.Unauthorized: raise except Exception as e: raise except couchdb.http.Unauthorized: raise except Exception as e: raise # Update the design document self.update_views()
def store_tweet(self, tweet_status, source=None): """Analyses and stores a tweet in the database. This method conducts sentiment analysis and geocoding on the tweet before attempting to store it in the database. Args: tweet_status (tweepy.Status): source (dict): Provenance data to store in the tweet. """ # Convert the tweepy Status object into a dict tweet_str = json.dumps(tweet_status._json) tweet = json.loads(tweet_str) print("Processing tweet: " + tweet['id_str']) # Add source to tweet if source is None: source = {'api': [], 'wa': {}} tweet.update(source.copy()) # If this tweet is from one of our outlets, store the outlet try: if tweet['user']['id_str'] in self.id_to_outlet: tweet['wa']['outlet'] = self.id_to_outlet[tweet['user'] ['id_str']] except Exception as e: print("Warning: An unexpected Exception was raised in " + "checking for and assigning id_to_outlet.") pass # If this tweet is replying to one of our outlets, store the outlet try: if tweet['in_reply_to_user_id_str'] in self.id_to_outlet: tweet['wa']['reply_to'] = self.id_to_outlet[ tweet['in_reply_to_user_id_str']] except KeyError: pass except Exception as e: print("Warning: An unexpected Exception was raised in " + "checking for and assigning id_to_outlet.") pass # If this tweet mentions one of our outlets, store the outlet(s) try: if tweet['entities']['user_mentions']: for u in tweet['entities']['user_mentions']: if u['id_str'] in self.id_to_outlet and u[ 'id_str'] is not None: if 'mentions' not in tweet['wa']: tweet['wa']['mentions'] = [] tweet['wa']['mentions'].append( self.id_to_outlet[u['id_str']]) except: pass # Convert the creation time to a UNIX timestamp try: tweet['wa']['time'] = core.get_time(tweet['created_at']) except Exception as e: print("Warning: An unexpected Exception was raised in " + "converting created_at to a UNIX timestamp.") print(str(e)) pass # Sentiment analysis sentiment = self.senti.analyse(tweet['text']) tweet.update(sentiment) # Geocoding if tweet['coordinates'] is not None: try: geocode = self.get_geocode(tweet['geo']['coordinates']) tweet.update(geocode) except: print("Warning: geocode() failed for tweet " + tweet['id_str']) pass # If related to an article, store a duplicate of the tweet in # the URLs database if 'url' in tweet['wa']: response = self.db_tweets_urls.store_tweet(tweet) if response is not None: print("Stored tweet: " + tweet['id_str'] + " in URLs database.") # If the tweet is older than 28 days, store in the archive oldest_time = int(time.time() - 60 * 60 * 24 * core.config('twitter', 'days')) if (int(tweet['wa']['time']) < oldest_time): response = self.db_tweets_archive.store_tweet(tweet) if response is not None: print("Stored tweet " + tweet['id_str'] + " in archive database.") else: response = self.db_tweets.store_tweet(tweet) if response is not None: print("Stored tweet " + tweet['id_str'] + " in database.") return response
pass except: pass class TweetStreamListener(tweepy.StreamListener): def on_status(self, status): print(status.text) def stream_tweets(self): tweetStreamListener = self.TweetStreamListener() melbourneStream = tweepy.Stream(auth=self.api.auth, listener=tweetStreamListener) melbourneStream.filter( locations=[144.4441, -38.5030, 145.8176, -37.4018], async=True) th = TweetHarvester() while False: th.iterate_timelines() oldest_time = str( int(time.time() - 60 * 60 * 24 * core.config('twitter', 'days'))) th.iterate_articles(oldest_time) while False: th.iterate_replies() while False: th.iterate_retweets() while False: th.iterate_articles() #tweets = th.db_tweets.get_topic_tweets('rugby')
#!/usr/bin/python3 """""" import sys import os sys.path.append(os.path.join(os.path.dirname(sys.path[0]), 'harvester')) import core from comms.couchdb import CouchDBComms from comms.nectar import ObjectStore # Check that the config file is complete args = core.config() # couchdb # Required args: ip_address, port # Optional args: https, login, password obj_store = ObjectStore('wa-opengraph') print(":CouchDBComms: Attempting to construct class") try: db = CouchDBComms('articles2') print("Successfully created class") except: raise #db.store_article({'url': 'http://www.google.com'})
print a message as a error strErr: message to print or user input errType: if equals 'nr' the it prints the the user input specified above is not recognized no returns ''' echo('-!- : ') if(errType == "nr"): #not recognized error echo(strErr + " is not recognized\n\n") elif(errType == ""): echo(strErr + "\n\n") # check if data folder exists, if not create it if not os.path.isdir('data'): os.mkdir('data') # load the config file conf = core.config('data/config.dat') # initialize the progress variable <core.study.level class> progress = None # main loop while(exitCode == 0): initCmd() # save the config file if everything goes normally conf.save() echo("\nexited with code " + str(exitCode))
def config(path): ''' Edit the config. ''' core.config(path)