def __init__(self):
     super(MyStreamListener, self).__init__()
     self.__tweet = ''
     self.__languages = 'en'
     self.__tickers = Ticker()
     self.__trackList = self.__tickers.getTickers()
     self.setup_logger('msllog', r'logs/msl.log')
     self.msllog = logging.getLogger('msllog')
 def __init__(self):
     super(MyStreamListener, self).__init__()
     self.__tweet = ''
     self.__languages = 'en'
     self.__tickers = Ticker()
     self.__trackList = self.__tickers.getTickers()
     self.setup_logger('msllog', r'logs/msl.log')
     self.msllog = logging.getLogger('msllog')
class MyStreamListener(StreamListener):

    def __init__(self):
        super(MyStreamListener, self).__init__()
        self.__tweet = ''
        self.__languages = 'en'
        self.__tickers = Ticker()
        self.__trackList = self.__tickers.getTickers()
        self.setup_logger('msllog', r'logs/msl.log')
        self.msllog = logging.getLogger('msllog')

    def on_data(self, data):
        try:
            client = MongoClient('localhost', 27017)
            db = client['twitter_db']
            collection = db['twitter_collection']
            self.__tweet = json.loads(data)
            self.__tweetText = self.__tweet['text'].encode('ascii','ignore').strip()

            # keyList = []
            # keys = open('keys.txt', 'w')
            # for key in self.__tweet.keys():
            #     if key not in keyList:
            #         keyList.append(key)
            #         keys.write(key)
            #         keys.write('\n')
            # keys.close()

            # extract data from tweet
            insertion = {}
            fieldsToExtract = ['id','timestamp_ms','text','favorited','favorite_count','retweeted',
                               'retweet_count','coordinates','geo']
            for field in self.__tweet:
                if field in fieldsToExtract and self.__tweet['lang'] == self.__languages:
                    insertion[field] = self.__tweet[field]

                    # build word dictionary for record
                    rd = self.buildRecordDict()
                    insertion['rd'] = rd

            if insertion:
                # write to file
                # saveFile = open('raw_tweets.json', 'w+')
                # saveFile.write(data)
                # saveFile.write('\n')
                # saveFile.close()

                # insert tweet into mongodb
                print insertion
                collection.insert(insertion)

        except Exception as e:
            self.msllog.warning(e)
            pass

    def on_error(self, status):
        print(status)
        if status == 420:
            time.sleep(60)

    def buildRecordDict(self):
        try:
            recordDict = {}
            tweet = self.__tweetText.split()
            for word in tweet:
                wrd = word.lower().strip().encode('utf-8')

                if '.' in wrd:
                    wrd = wrd.replace('.', '')
                if '$' in wrd:
                    wrd = wrd.replace('$', '')

                if wrd in recordDict.keys():
                    recordDict[wrd] += 1
                else:
                    recordDict[wrd] = 1

            return recordDict

        except Exception as e:
            msg = '{} {}'.format(self.__tweet['id'], e)
            self.msllog.warning(msg)

    def setup_logger(self, logger_name, log_file, level=logging.INFO):
        l = logging.getLogger(logger_name)
        formatter = logging.Formatter('%(asctime)s : %(message)s')
        fileHandler = logging.FileHandler(log_file, mode='w')
        fileHandler.setFormatter(formatter)
        streamHandler = logging.StreamHandler()
        streamHandler.setFormatter(formatter)

        l.setLevel(level)
        l.addHandler(fileHandler)
        l.addHandler(streamHandler)
Beispiel #4
0
__author__ = 'kahlil'

import tweepy
from tweepy import Stream
from classes.oauth import OAuth
from classes.myStreamListener import MyStreamListener
from classes.ticker import Ticker
import logging
logging.basicConfig(filename='logs/tc.log',
                    level=logging.DEBUG,
                    format='%(asctime)s %(message)s')

try:
    t = Ticker()
    oa = OAuth()
    auth = tweepy.OAuthHandler(oa.getConsumerKey(), oa.getConsumerSecret())
    auth.set_access_token(oa.getAccessToken(), oa.getAccessTokenSecret())

    api = tweepy.API(auth)
    l = MyStreamListener()
    stream = Stream(auth, l)

    for chunk in t.chunkTickers():
        stream.filter(track=chunk, async=True)
except Exception as e:
    logging.warning(e)
Beispiel #5
0
 def __init__(self, db):
     self.__db = db
     self.__tickers = Ticker().getTickers()
     self.__words = {}
     self.setup_logger('dplog', r'logs/dp.log')
     self.dplog = logging.getLogger('dplog')
class MyStreamListener(StreamListener):
    def __init__(self):
        super(MyStreamListener, self).__init__()
        self.__tweet = ''
        self.__languages = 'en'
        self.__tickers = Ticker()
        self.__trackList = self.__tickers.getTickers()
        self.setup_logger('msllog', r'logs/msl.log')
        self.msllog = logging.getLogger('msllog')

    def on_data(self, data):
        try:
            client = MongoClient('localhost', 27017)
            db = client['twitter_db']
            collection = db['twitter_collection']
            self.__tweet = json.loads(data)
            self.__tweetText = self.__tweet['text'].encode('ascii',
                                                           'ignore').strip()

            # keyList = []
            # keys = open('keys.txt', 'w')
            # for key in self.__tweet.keys():
            #     if key not in keyList:
            #         keyList.append(key)
            #         keys.write(key)
            #         keys.write('\n')
            # keys.close()

            # extract data from tweet
            insertion = {}
            fieldsToExtract = [
                'id', 'timestamp_ms', 'text', 'favorited', 'favorite_count',
                'retweeted', 'retweet_count', 'coordinates', 'geo'
            ]
            for field in self.__tweet:
                if field in fieldsToExtract and self.__tweet[
                        'lang'] == self.__languages:
                    insertion[field] = self.__tweet[field]

                    # build word dictionary for record
                    rd = self.buildRecordDict()
                    insertion['rd'] = rd

            if insertion:
                # write to file
                # saveFile = open('raw_tweets.json', 'w+')
                # saveFile.write(data)
                # saveFile.write('\n')
                # saveFile.close()

                # insert tweet into mongodb
                print insertion
                collection.insert(insertion)

        except Exception as e:
            self.msllog.warning(e)
            pass

    def on_error(self, status):
        print(status)
        if status == 420:
            time.sleep(60)

    def buildRecordDict(self):
        try:
            recordDict = {}
            tweet = self.__tweetText.split()
            for word in tweet:
                wrd = word.lower().strip().encode('utf-8')

                if '.' in wrd:
                    wrd = wrd.replace('.', '')
                if '$' in wrd:
                    wrd = wrd.replace('$', '')

                if wrd in recordDict.keys():
                    recordDict[wrd] += 1
                else:
                    recordDict[wrd] = 1

            return recordDict

        except Exception as e:
            msg = '{} {}'.format(self.__tweet['id'], e)
            self.msllog.warning(msg)

    def setup_logger(self, logger_name, log_file, level=logging.INFO):
        l = logging.getLogger(logger_name)
        formatter = logging.Formatter('%(asctime)s : %(message)s')
        fileHandler = logging.FileHandler(log_file, mode='w')
        fileHandler.setFormatter(formatter)
        streamHandler = logging.StreamHandler()
        streamHandler.setFormatter(formatter)

        l.setLevel(level)
        l.addHandler(fileHandler)
        l.addHandler(streamHandler)