Esempio n. 1
0
class TweepyStreamBackoffTests(unittest.TestCase):
    def setUp(self):
        #bad auth causes twitter to return 401 errors
        self.auth = OAuthHandler("bad-key", "bad-secret")
        self.auth.set_access_token("bad-token", "bad-token-secret")
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener)

    def tearDown(self):
        self.stream.disconnect()

    def test_exp_backoff(self):
        self.stream = Stream(self.auth, self.listener, timeout=3.0,
                             retry_count=1, retry_time=1.0, retry_time_cap=100.0)
        self.stream.sample()
        # 1 retry, should be 4x the retry_time
        self.assertEqual(self.stream.retry_time, 4.0)

    def test_exp_backoff_cap(self):
        self.stream = Stream(self.auth, self.listener, timeout=3.0,
                             retry_count=1, retry_time=1.0, retry_time_cap=3.0)
        self.stream.sample()
        # 1 retry, but 4x the retry_time exceeds the cap, so should be capped
        self.assertEqual(self.stream.retry_time, 3.0)

    mock_resp = MagicMock()
    mock_resp.return_value.status = 420

    @patch(getresponse_location, mock_resp)
    def test_420(self):
        self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=0,
                             retry_time=1.0, retry_420=1.5, retry_time_cap=20.0)
        self.stream.sample()
        # no retries, but error 420, should be double the retry_420, not double the retry_time
        self.assertEqual(self.stream.retry_time, 3.0)
Esempio n. 2
0
class TweepyStreamBackoffTests(unittest.TestCase):
    def setUp(self):
        #bad auth causes twitter to return 401 errors
        self.auth = OAuthHandler("bad-key", "bad-secret")
        self.auth.set_access_token("bad-token", "bad-token-secret")
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener)

    def tearDown(self):
        self.stream.disconnect()

    def test_exp_backoff(self):
        self.stream = Stream(self.auth, self.listener, timeout=3.0,
                             retry_count=1, retry_time=1.0, retry_time_cap=100.0)
        self.stream.sample()
        # 1 retry, should be 4x the retry_time
        self.assertEqual(self.stream.retry_time, 4.0)

    def test_exp_backoff_cap(self):
        self.stream = Stream(self.auth, self.listener, timeout=3.0,
                             retry_count=1, retry_time=1.0, retry_time_cap=3.0)
        self.stream.sample()
        # 1 retry, but 4x the retry_time exceeds the cap, so should be capped
        self.assertEqual(self.stream.retry_time, 3.0)

    mock_resp = MagicMock()
    mock_resp.return_value.status_code = 420

    @patch('requests.Session.request', mock_resp)
    def test_420(self):
        self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=0,
                             retry_time=1.0, retry_420=1.5, retry_time_cap=20.0)
        self.stream.sample()
        # no retries, but error 420, should be double the retry_420, not double the retry_time
        self.assertEqual(self.stream.retry_time, 3.0)
Esempio n. 3
0
class TweepyStreamTests(unittest.TestCase):
    def setUp(self):
        self.auth = create_auth()
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener, timeout=3.0)

    def tearDown(self):
        self.stream.disconnect()

    def test_userstream(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream()
        self.assertEqual(self.listener.status_count, 1)

    def test_sample(self):
        self.listener.status_stop_count = 10
        self.stream.sample()
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_filter_track(self):
        self.listener.status_stop_count = 5
        phrases = ['twitter']
        self.stream.filter(track=phrases)
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)
Esempio n. 4
0
def main():
    """ Main run function """

    set_logging()
    logger = logging.getLogger(f"{__name__}.main")

    auth = tweepy.OAuthHandler(os.getenv("CONSUMER_KEY"),
                               os.getenv("CONSUMER_KEY_SECRET"))
    auth.set_access_token(os.getenv("ACCESS_TOKEN"),
                          os.getenv("ACCESS_TOKEN_SECRET"))
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    following = get_following_users(api)
    following.extend(get_default_users(api))
    tracks = get_following_searches()

    logger.info(f"Following {len(following)} users and {len(tracks)} searches")
    while True:
        listener = Listener()
        stream = Stream(auth=api.auth, listener=listener)
        try:
            logger.info("Started streaming")
            stream.filter(follow=following, track=tracks)
        except KeyboardInterrupt:
            logger.info("Stopped")
            break
        except ReadTimeoutError as exc:
            logger.error("Handled exception:", str(exc), exc_info=True)
        finally:
            logger.info("Done")
            stream.disconnect()
Esempio n. 5
0
class TweepyStreamTests(unittest.TestCase):
    def setUp(self):
        self.auth = create_auth()
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener, timeout=3.0)

    def tearDown(self):
        self.stream.disconnect()

    def test_userstream(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream()
        self.assertEqual(self.listener.status_count, 1)

    def test_sample(self):
        self.listener.status_stop_count = 10
        self.stream.sample()
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_filter_track(self):
        self.listener.status_stop_count = 5
        phrases = ['twitter']
        self.stream.filter(track=phrases)
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)
Esempio n. 6
0
class TweepyStreamTests(unittest.TestCase):
    def setUp(self):
        self.auth = create_auth()
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener, timeout=3.0)

    def tearDown(self):
        self.stream.disconnect()

    def test_userstream(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream()
        self.assertEqual(self.listener.status_count, 1)

    def test_userstream_with_params(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream(_with='user',
                               replies='all',
                               stall_warnings=True)
        self.assertEqual(self.listener.status_count, 1)

    def test_sample(self):
        self.listener.status_stop_count = 10
        self.stream.sample()
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_filter_track(self):
        self.listener.status_stop_count = 5
        phrases = ['twitter']
        self.stream.filter(track=phrases)
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_track_encoding(self):
        s = Stream(None, None)
        s._start = lambda async: None
        s.filter(track=[u'Caf\xe9'])

        # Should be UTF-8 encoded
        self.assertEqual(u'Caf\xe9'.encode('utf8'), s.session.params['track'])

    def test_follow_encoding(self):
        s = Stream(None, None)
        s._start = lambda async: None
        s.filter(follow=[u'Caf\xe9'])

        # Should be UTF-8 encoded
        self.assertEqual(u'Caf\xe9'.encode('utf8'), s.session.params['follow'])
Esempio n. 7
0
class TweepyStreamTests(unittest.TestCase):
    def setUp(self):
        self.auth = create_auth()
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener, timeout=3.0)

    def tearDown(self):
        self.stream.disconnect()

    def test_userstream(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream()
        self.assertEqual(self.listener.status_count, 1)

    def test_userstream_with_params(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream(_with='user', replies='all', stall_warnings=True)
        self.assertEqual(self.listener.status_count, 1)

    def test_sample(self):
        self.listener.status_stop_count = 10
        self.stream.sample()
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_filter_track(self):
        self.listener.status_stop_count = 5
        phrases = ['twitter']
        self.stream.filter(track=phrases)
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_track_encoding(self):
        s = Stream(None, None)
        s._start = lambda async: None
        s.filter(track=[u'Caf\xe9'])

        # Should be UTF-8 encoded
        self.assertEqual(u'Caf\xe9'.encode('utf8'), s.parameters['track'])

    def test_follow_encoding(self):
        s = Stream(None, None)
        s._start = lambda async: None
        s.filter(follow=[u'Caf\xe9'])

        # Should be UTF-8 encoded
        self.assertEqual(u'Caf\xe9'.encode('utf8'), s.parameters['follow'])
Esempio n. 8
0
    def __listen(self):
        listener = TweetStreamListener(self.api, self.sentiment, self.error)
        stream = Stream(self.auth, listener)

        print 'Starting stream...'
        try:
            stream.filter(track=self.track)
        except:
            print 'Encountered error!'
            print 'Exiting application'
            item = {
                    'status'    : 'stream down',
                    'timestamp' : datetime.utcnow()
                   }
            self.error.save(item)
            stream.disconnect()
Esempio n. 9
0
class TweepyStreamTests(unittest.TestCase):
    def setUp(self):
        self.auth = create_auth()
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener, timeout=3.0)

    def tearDown(self):
        self.stream.disconnect()

    def test_userstream(self):
        # Generate random tweet which should show up in the stream.
        def on_connect():
            API(self.auth).update_status(mock_tweet())

        self.listener.connect_cb = on_connect
        self.listener.status_stop_count = 1
        self.stream.userstream()
        self.assertEqual(self.listener.status_count, 1)

    def test_sample(self):
        self.listener.status_stop_count = 10
        self.stream.sample()
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_filter_track(self):
        self.listener.status_stop_count = 5
        phrases = ['twitter']
        self.stream.filter(track=phrases)
        self.assertEquals(self.listener.status_count,
                          self.listener.status_stop_count)

    def test_on_data(self):        
        test_wrong_data = [
            '{"disc', # this is actual data read from twitter
            '600',    # this is actual data read from twitter
            '41\n',   # this is actual data read from twitter
            'obviously non-json',
            '"json but not dict"',
			'{"json dict":"but not a twitter message"}'
        ]        
        for raw_data in test_wrong_data: 
            # should log errors but not raise / not return False
            self.assertEquals(self.listener.on_data(raw_data), None)
            self.assertEquals(self.listener.status_count, 0)
Esempio n. 10
0
class TweepyStreamTests(unittest.TestCase):
    def setUp(self):
        self.auth = create_auth()
        self.listener = MockStreamListener(self)
        self.stream = Stream(self.auth, self.listener, timeout=3.0)

    def tearDown(self):
        self.stream.disconnect()

    def on_connect(self):
        API(self.auth).update_status(mock_tweet())

    def test_sample(self):
        self.listener.status_stop_count = 10
        self.stream.sample()
        self.assertEqual(self.listener.status_count,
                         self.listener.status_stop_count)

    def test_filter_track(self):
        self.listener.status_stop_count = 5
        phrases = ['twitter']
        self.stream.filter(track=phrases)
        self.assertEqual(self.listener.status_count,
                         self.listener.status_stop_count)

    def test_track_encoding(self):
        s = Stream(None, None)
        s._start = lambda is_async: None
        s.filter(track=['Caf\xe9'])

        # Should be UTF-8 encoded
        self.assertEqual('Caf\xe9'.encode('utf8'), s.body['track'])

    def test_follow_encoding(self):
        s = Stream(None, None)
        s._start = lambda is_async: None
        s.filter(follow=['Caf\xe9'])

        # Should be UTF-8 encoded
        self.assertEqual('Caf\xe9'.encode('utf8'), s.body['follow'])
Esempio n. 11
0
class TweepyLib:
    
    CONFIG_FILENAME = "twitterConfig.ini" # name of the config file defining e.g. the Twitter API key
    CONFIG_SECTION_KEYS = "keys" # name of the section of the config file holding the various Twitter API keys/tokens/etc.

    def __init__(self):
        config = RawConfigParser()
        configFilePath = os.path.join(os.path.dirname(__file__), self.__class__.CONFIG_FILENAME)
        logger.info("HELLO WORLD")
        logger.info(configFilePath)
        config.read(configFilePath)
        
        # Read API keys/tokens from config file
        self.consumer_key = config.get(self.__class__.CONFIG_SECTION_KEYS, "consumer_key")
        self.consumer_secret = config.get(self.__class__.CONFIG_SECTION_KEYS, "consumer_secret")
        self.access_token = config.get(self.__class__.CONFIG_SECTION_KEYS, "access_token")
        self.access_token_secret = config.get(self.__class__.CONFIG_SECTION_KEYS, "access_token_secret")
        self.auth = OAuthHandler(self.consumer_key, self.consumer_secret)
        
        self.isStreaming=False
        self.connect()
        self.tweetsQueue = Queue.Queue()
        self.filter=[]
    
    def connect(self):
        #todo: recheck needed auth
        self.auth.set_access_token(self.access_token, self.access_token_secret)
        logger.info("Connected with Twitter")
  
    def doStreaming(self):
        class StdOutListener(StreamListener):
            def __init__(self, twitterObject):
                super(StdOutListener, self).__init__()
                self.twitterObject = twitterObject
           
            def on_status(self, status):
                logger.info("adds new tweeter to queue with id %s: %s" % (status.id, status.text))
                self.twitterObject.tweetsQueue.put(status)
                return True
                 
            def on_error(self, status):
                logger.warning("Error %s while reading twitter" % status)
  
        listener = StdOutListener(self) 
        self.stream = Stream(self.auth, listener)
        self.stream.filter(track=self.filter, async=True)
  
    def startStreamer(self):
        if(not self.filter):
            logger.warning("Trying to start stream, but the filter is empty")
            return
        self.doStreaming()
        logger.info("Streamer was started")
        self.isStreaming = True
    
    def stopStreamer(self):
        if(self.isStreaming):      
            self.isStreaming = False      
            self.stream.disconnect()
            logger.info("Streamer was stopped")
      
    def restartStreamer(self):
        self.stopStreamer()
        self.startStreamer()
      
    def addHashtagToFilter(self, newHashtag):
        if(newHashtag[:1] != '#'):
            newHashtag = '#' + newHashtag
        if(newHashtag in self.filter):
            return    
        self.filter.append(newHashtag.encode())
        logger.info("%s added to the filter" % newHashtag)
        self.restartStreamer()
        
    def removeHashtagFromFilter(self, existingHashtag):
        if(existingHashtag[:1] != '#'):
            existingHashtag = '#' + existingHashtag
        if(existingHashtag in self.filter):
            return    
        self.filter.remove(existingHashtag)
        logger.info("%s removed from the filter" % existingHashtag)
        self.restartStreamer()
        
    def removeAllHashtagsFromFilter(self):
        changes = 0
        for i in self.filter:
            # we only want to rmeove hashtags, not other filters
            if(i[:1] == '#'):
                changes += 1
                self.filter.remove(i)
        logger.info("All hashtags removed from the filter")
        if(changes != 0):
            self.restartStreamer()
Esempio n. 12
0
class TwitterCrawlerAgent(spade.Agent.Agent):
    def log(self, msg):
        print '[TWITTER_CRAWLER] ' + str(msg)

    def init_stream(self):
        self.q = Queue()
        self.keywords = []
        self.listener = TwitterStreamListener(self.keywords, self.q)
        auth = OAuthHandler(config.con_secret, config.con_secret_key)
        auth.set_access_token(config.token, config.token_key)
        self.stream = Stream(auth, self.listener)

    def restart_stream(self):
        self.log("Restarting stream")
        self.stream.disconnect()
        self.stream.filter(track=self.keywords, async=True, languages=["en"])

    def update_stream(self, keywords):
        self.log("Updating stream with keywords: ")
        self.log(keywords)
        if len(keywords) != 0:
            self.keywords = keywords
            self.listener.set_keywords(self.keywords)
            self.restart_stream()
        else:
            self.log("Empty keyword list!")

    def prepare_and_send_message(self):
        self.log("Preparing message")
        receiver = spade.AID.aid(name=config.sentimenter,
                                 addresses=["xmpp://" + config.sentimenter])
        self.msg = spade.ACLMessage.ACLMessage()
        self.msg.setPerformative("inform")
        self.msg.setOntology(config.raw_tweet)
        self.msg.addReceiver(receiver)
        content = self.get_tweets_from_queue()
        str_content = ""
        for tweet in content:
            str_content += tweet.serialize() + "|"
        str_content = str_content[:-1]
        self.msg.setContent(str_content)
        self.send(self.msg)
        self.log("Message sent")

    def get_tweets_from_queue(self):
        self.log("Preparing tweet package content")
        tweets_list = []
        while len(tweets_list) < config.batch_size and not self.q.empty():
            item = self.q.get()
            if item is None:
                break
            tweets_list.append(item)
        return tweets_list

    def _setup(self):
        # self.setDebugToScreen()
        self.init_stream()
        self.addBehaviour(ListenForTweetsBehav(10), None)

        template = spade.Behaviour.ACLTemplate()
        template.setOntology(config.keyword_msg)
        t = spade.Behaviour.MessageTemplate(template)
        self.addBehaviour(UpdateKeywordsBehav(), t)