def __init__(self): super(MyStreamListener, self).__init__() self.__tweet = '' self.__languages = 'en' self.__tickers = Ticker() self.__trackList = self.__tickers.getTickers() self.setup_logger('msllog', r'logs/msl.log') self.msllog = logging.getLogger('msllog')
class MyStreamListener(StreamListener): def __init__(self): super(MyStreamListener, self).__init__() self.__tweet = '' self.__languages = 'en' self.__tickers = Ticker() self.__trackList = self.__tickers.getTickers() self.setup_logger('msllog', r'logs/msl.log') self.msllog = logging.getLogger('msllog') def on_data(self, data): try: client = MongoClient('localhost', 27017) db = client['twitter_db'] collection = db['twitter_collection'] self.__tweet = json.loads(data) self.__tweetText = self.__tweet['text'].encode('ascii','ignore').strip() # keyList = [] # keys = open('keys.txt', 'w') # for key in self.__tweet.keys(): # if key not in keyList: # keyList.append(key) # keys.write(key) # keys.write('\n') # keys.close() # extract data from tweet insertion = {} fieldsToExtract = ['id','timestamp_ms','text','favorited','favorite_count','retweeted', 'retweet_count','coordinates','geo'] for field in self.__tweet: if field in fieldsToExtract and self.__tweet['lang'] == self.__languages: insertion[field] = self.__tweet[field] # build word dictionary for record rd = self.buildRecordDict() insertion['rd'] = rd if insertion: # write to file # saveFile = open('raw_tweets.json', 'w+') # saveFile.write(data) # saveFile.write('\n') # saveFile.close() # insert tweet into mongodb print insertion collection.insert(insertion) except Exception as e: self.msllog.warning(e) pass def on_error(self, status): print(status) if status == 420: time.sleep(60) def buildRecordDict(self): try: recordDict = {} tweet = self.__tweetText.split() for word in tweet: wrd = word.lower().strip().encode('utf-8') if '.' in wrd: wrd = wrd.replace('.', '') if '$' in wrd: wrd = wrd.replace('$', '') if wrd in recordDict.keys(): recordDict[wrd] += 1 else: recordDict[wrd] = 1 return recordDict except Exception as e: msg = '{} {}'.format(self.__tweet['id'], e) self.msllog.warning(msg) def setup_logger(self, logger_name, log_file, level=logging.INFO): l = logging.getLogger(logger_name) formatter = logging.Formatter('%(asctime)s : %(message)s') fileHandler = logging.FileHandler(log_file, mode='w') fileHandler.setFormatter(formatter) streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) l.setLevel(level) l.addHandler(fileHandler) l.addHandler(streamHandler)
__author__ = 'kahlil' import tweepy from tweepy import Stream from classes.oauth import OAuth from classes.myStreamListener import MyStreamListener from classes.ticker import Ticker import logging logging.basicConfig(filename='logs/tc.log', level=logging.DEBUG, format='%(asctime)s %(message)s') try: t = Ticker() oa = OAuth() auth = tweepy.OAuthHandler(oa.getConsumerKey(), oa.getConsumerSecret()) auth.set_access_token(oa.getAccessToken(), oa.getAccessTokenSecret()) api = tweepy.API(auth) l = MyStreamListener() stream = Stream(auth, l) for chunk in t.chunkTickers(): stream.filter(track=chunk, async=True) except Exception as e: logging.warning(e)
def __init__(self, db): self.__db = db self.__tickers = Ticker().getTickers() self.__words = {} self.setup_logger('dplog', r'logs/dp.log') self.dplog = logging.getLogger('dplog')
class MyStreamListener(StreamListener): def __init__(self): super(MyStreamListener, self).__init__() self.__tweet = '' self.__languages = 'en' self.__tickers = Ticker() self.__trackList = self.__tickers.getTickers() self.setup_logger('msllog', r'logs/msl.log') self.msllog = logging.getLogger('msllog') def on_data(self, data): try: client = MongoClient('localhost', 27017) db = client['twitter_db'] collection = db['twitter_collection'] self.__tweet = json.loads(data) self.__tweetText = self.__tweet['text'].encode('ascii', 'ignore').strip() # keyList = [] # keys = open('keys.txt', 'w') # for key in self.__tweet.keys(): # if key not in keyList: # keyList.append(key) # keys.write(key) # keys.write('\n') # keys.close() # extract data from tweet insertion = {} fieldsToExtract = [ 'id', 'timestamp_ms', 'text', 'favorited', 'favorite_count', 'retweeted', 'retweet_count', 'coordinates', 'geo' ] for field in self.__tweet: if field in fieldsToExtract and self.__tweet[ 'lang'] == self.__languages: insertion[field] = self.__tweet[field] # build word dictionary for record rd = self.buildRecordDict() insertion['rd'] = rd if insertion: # write to file # saveFile = open('raw_tweets.json', 'w+') # saveFile.write(data) # saveFile.write('\n') # saveFile.close() # insert tweet into mongodb print insertion collection.insert(insertion) except Exception as e: self.msllog.warning(e) pass def on_error(self, status): print(status) if status == 420: time.sleep(60) def buildRecordDict(self): try: recordDict = {} tweet = self.__tweetText.split() for word in tweet: wrd = word.lower().strip().encode('utf-8') if '.' in wrd: wrd = wrd.replace('.', '') if '$' in wrd: wrd = wrd.replace('$', '') if wrd in recordDict.keys(): recordDict[wrd] += 1 else: recordDict[wrd] = 1 return recordDict except Exception as e: msg = '{} {}'.format(self.__tweet['id'], e) self.msllog.warning(msg) def setup_logger(self, logger_name, log_file, level=logging.INFO): l = logging.getLogger(logger_name) formatter = logging.Formatter('%(asctime)s : %(message)s') fileHandler = logging.FileHandler(log_file, mode='w') fileHandler.setFormatter(formatter) streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) l.setLevel(level) l.addHandler(fileHandler) l.addHandler(streamHandler)