class TweepyStreamBackoffTests(unittest.TestCase): def setUp(self): #bad auth causes twitter to return 401 errors self.auth = OAuthHandler("bad-key", "bad-secret") self.auth.set_access_token("bad-token", "bad-token-secret") self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener) def tearDown(self): self.stream.disconnect() def test_exp_backoff(self): self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=1, retry_time=1.0, retry_time_cap=100.0) self.stream.sample() # 1 retry, should be 4x the retry_time self.assertEqual(self.stream.retry_time, 4.0) def test_exp_backoff_cap(self): self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=1, retry_time=1.0, retry_time_cap=3.0) self.stream.sample() # 1 retry, but 4x the retry_time exceeds the cap, so should be capped self.assertEqual(self.stream.retry_time, 3.0) mock_resp = MagicMock() mock_resp.return_value.status = 420 @patch(getresponse_location, mock_resp) def test_420(self): self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=0, retry_time=1.0, retry_420=1.5, retry_time_cap=20.0) self.stream.sample() # no retries, but error 420, should be double the retry_420, not double the retry_time self.assertEqual(self.stream.retry_time, 3.0)
class TweepyStreamBackoffTests(unittest.TestCase): def setUp(self): #bad auth causes twitter to return 401 errors self.auth = OAuthHandler("bad-key", "bad-secret") self.auth.set_access_token("bad-token", "bad-token-secret") self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener) def tearDown(self): self.stream.disconnect() def test_exp_backoff(self): self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=1, retry_time=1.0, retry_time_cap=100.0) self.stream.sample() # 1 retry, should be 4x the retry_time self.assertEqual(self.stream.retry_time, 4.0) def test_exp_backoff_cap(self): self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=1, retry_time=1.0, retry_time_cap=3.0) self.stream.sample() # 1 retry, but 4x the retry_time exceeds the cap, so should be capped self.assertEqual(self.stream.retry_time, 3.0) mock_resp = MagicMock() mock_resp.return_value.status_code = 420 @patch('requests.Session.request', mock_resp) def test_420(self): self.stream = Stream(self.auth, self.listener, timeout=3.0, retry_count=0, retry_time=1.0, retry_420=1.5, retry_time_cap=20.0) self.stream.sample() # no retries, but error 420, should be double the retry_420, not double the retry_time self.assertEqual(self.stream.retry_time, 3.0)
class TweepyStreamTests(unittest.TestCase): def setUp(self): self.auth = create_auth() self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener, timeout=3.0) def tearDown(self): self.stream.disconnect() def test_userstream(self): # Generate random tweet which should show up in the stream. def on_connect(): API(self.auth).update_status(mock_tweet()) self.listener.connect_cb = on_connect self.listener.status_stop_count = 1 self.stream.userstream() self.assertEqual(self.listener.status_count, 1) def test_sample(self): self.listener.status_stop_count = 10 self.stream.sample() self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_filter_track(self): self.listener.status_stop_count = 5 phrases = ['twitter'] self.stream.filter(track=phrases) self.assertEquals(self.listener.status_count, self.listener.status_stop_count)
def main(): """ Main run function """ set_logging() logger = logging.getLogger(f"{__name__}.main") auth = tweepy.OAuthHandler(os.getenv("CONSUMER_KEY"), os.getenv("CONSUMER_KEY_SECRET")) auth.set_access_token(os.getenv("ACCESS_TOKEN"), os.getenv("ACCESS_TOKEN_SECRET")) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) following = get_following_users(api) following.extend(get_default_users(api)) tracks = get_following_searches() logger.info(f"Following {len(following)} users and {len(tracks)} searches") while True: listener = Listener() stream = Stream(auth=api.auth, listener=listener) try: logger.info("Started streaming") stream.filter(follow=following, track=tracks) except KeyboardInterrupt: logger.info("Stopped") break except ReadTimeoutError as exc: logger.error("Handled exception:", str(exc), exc_info=True) finally: logger.info("Done") stream.disconnect()
class TweepyStreamTests(unittest.TestCase): def setUp(self): self.auth = create_auth() self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener, timeout=3.0) def tearDown(self): self.stream.disconnect() def test_userstream(self): # Generate random tweet which should show up in the stream. def on_connect(): API(self.auth).update_status(mock_tweet()) self.listener.connect_cb = on_connect self.listener.status_stop_count = 1 self.stream.userstream() self.assertEqual(self.listener.status_count, 1) def test_userstream_with_params(self): # Generate random tweet which should show up in the stream. def on_connect(): API(self.auth).update_status(mock_tweet()) self.listener.connect_cb = on_connect self.listener.status_stop_count = 1 self.stream.userstream(_with='user', replies='all', stall_warnings=True) self.assertEqual(self.listener.status_count, 1) def test_sample(self): self.listener.status_stop_count = 10 self.stream.sample() self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_filter_track(self): self.listener.status_stop_count = 5 phrases = ['twitter'] self.stream.filter(track=phrases) self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_track_encoding(self): s = Stream(None, None) s._start = lambda async: None s.filter(track=[u'Caf\xe9']) # Should be UTF-8 encoded self.assertEqual(u'Caf\xe9'.encode('utf8'), s.session.params['track']) def test_follow_encoding(self): s = Stream(None, None) s._start = lambda async: None s.filter(follow=[u'Caf\xe9']) # Should be UTF-8 encoded self.assertEqual(u'Caf\xe9'.encode('utf8'), s.session.params['follow'])
class TweepyStreamTests(unittest.TestCase): def setUp(self): self.auth = create_auth() self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener, timeout=3.0) def tearDown(self): self.stream.disconnect() def test_userstream(self): # Generate random tweet which should show up in the stream. def on_connect(): API(self.auth).update_status(mock_tweet()) self.listener.connect_cb = on_connect self.listener.status_stop_count = 1 self.stream.userstream() self.assertEqual(self.listener.status_count, 1) def test_userstream_with_params(self): # Generate random tweet which should show up in the stream. def on_connect(): API(self.auth).update_status(mock_tweet()) self.listener.connect_cb = on_connect self.listener.status_stop_count = 1 self.stream.userstream(_with='user', replies='all', stall_warnings=True) self.assertEqual(self.listener.status_count, 1) def test_sample(self): self.listener.status_stop_count = 10 self.stream.sample() self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_filter_track(self): self.listener.status_stop_count = 5 phrases = ['twitter'] self.stream.filter(track=phrases) self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_track_encoding(self): s = Stream(None, None) s._start = lambda async: None s.filter(track=[u'Caf\xe9']) # Should be UTF-8 encoded self.assertEqual(u'Caf\xe9'.encode('utf8'), s.parameters['track']) def test_follow_encoding(self): s = Stream(None, None) s._start = lambda async: None s.filter(follow=[u'Caf\xe9']) # Should be UTF-8 encoded self.assertEqual(u'Caf\xe9'.encode('utf8'), s.parameters['follow'])
def __listen(self): listener = TweetStreamListener(self.api, self.sentiment, self.error) stream = Stream(self.auth, listener) print 'Starting stream...' try: stream.filter(track=self.track) except: print 'Encountered error!' print 'Exiting application' item = { 'status' : 'stream down', 'timestamp' : datetime.utcnow() } self.error.save(item) stream.disconnect()
class TweepyStreamTests(unittest.TestCase): def setUp(self): self.auth = create_auth() self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener, timeout=3.0) def tearDown(self): self.stream.disconnect() def test_userstream(self): # Generate random tweet which should show up in the stream. def on_connect(): API(self.auth).update_status(mock_tweet()) self.listener.connect_cb = on_connect self.listener.status_stop_count = 1 self.stream.userstream() self.assertEqual(self.listener.status_count, 1) def test_sample(self): self.listener.status_stop_count = 10 self.stream.sample() self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_filter_track(self): self.listener.status_stop_count = 5 phrases = ['twitter'] self.stream.filter(track=phrases) self.assertEquals(self.listener.status_count, self.listener.status_stop_count) def test_on_data(self): test_wrong_data = [ '{"disc', # this is actual data read from twitter '600', # this is actual data read from twitter '41\n', # this is actual data read from twitter 'obviously non-json', '"json but not dict"', '{"json dict":"but not a twitter message"}' ] for raw_data in test_wrong_data: # should log errors but not raise / not return False self.assertEquals(self.listener.on_data(raw_data), None) self.assertEquals(self.listener.status_count, 0)
class TweepyStreamTests(unittest.TestCase): def setUp(self): self.auth = create_auth() self.listener = MockStreamListener(self) self.stream = Stream(self.auth, self.listener, timeout=3.0) def tearDown(self): self.stream.disconnect() def on_connect(self): API(self.auth).update_status(mock_tweet()) def test_sample(self): self.listener.status_stop_count = 10 self.stream.sample() self.assertEqual(self.listener.status_count, self.listener.status_stop_count) def test_filter_track(self): self.listener.status_stop_count = 5 phrases = ['twitter'] self.stream.filter(track=phrases) self.assertEqual(self.listener.status_count, self.listener.status_stop_count) def test_track_encoding(self): s = Stream(None, None) s._start = lambda is_async: None s.filter(track=['Caf\xe9']) # Should be UTF-8 encoded self.assertEqual('Caf\xe9'.encode('utf8'), s.body['track']) def test_follow_encoding(self): s = Stream(None, None) s._start = lambda is_async: None s.filter(follow=['Caf\xe9']) # Should be UTF-8 encoded self.assertEqual('Caf\xe9'.encode('utf8'), s.body['follow'])
class TweepyLib: CONFIG_FILENAME = "twitterConfig.ini" # name of the config file defining e.g. the Twitter API key CONFIG_SECTION_KEYS = "keys" # name of the section of the config file holding the various Twitter API keys/tokens/etc. def __init__(self): config = RawConfigParser() configFilePath = os.path.join(os.path.dirname(__file__), self.__class__.CONFIG_FILENAME) logger.info("HELLO WORLD") logger.info(configFilePath) config.read(configFilePath) # Read API keys/tokens from config file self.consumer_key = config.get(self.__class__.CONFIG_SECTION_KEYS, "consumer_key") self.consumer_secret = config.get(self.__class__.CONFIG_SECTION_KEYS, "consumer_secret") self.access_token = config.get(self.__class__.CONFIG_SECTION_KEYS, "access_token") self.access_token_secret = config.get(self.__class__.CONFIG_SECTION_KEYS, "access_token_secret") self.auth = OAuthHandler(self.consumer_key, self.consumer_secret) self.isStreaming=False self.connect() self.tweetsQueue = Queue.Queue() self.filter=[] def connect(self): #todo: recheck needed auth self.auth.set_access_token(self.access_token, self.access_token_secret) logger.info("Connected with Twitter") def doStreaming(self): class StdOutListener(StreamListener): def __init__(self, twitterObject): super(StdOutListener, self).__init__() self.twitterObject = twitterObject def on_status(self, status): logger.info("adds new tweeter to queue with id %s: %s" % (status.id, status.text)) self.twitterObject.tweetsQueue.put(status) return True def on_error(self, status): logger.warning("Error %s while reading twitter" % status) listener = StdOutListener(self) self.stream = Stream(self.auth, listener) self.stream.filter(track=self.filter, async=True) def startStreamer(self): if(not self.filter): logger.warning("Trying to start stream, but the filter is empty") return self.doStreaming() logger.info("Streamer was started") self.isStreaming = True def stopStreamer(self): if(self.isStreaming): self.isStreaming = False self.stream.disconnect() logger.info("Streamer was stopped") def restartStreamer(self): self.stopStreamer() self.startStreamer() def addHashtagToFilter(self, newHashtag): if(newHashtag[:1] != '#'): newHashtag = '#' + newHashtag if(newHashtag in self.filter): return self.filter.append(newHashtag.encode()) logger.info("%s added to the filter" % newHashtag) self.restartStreamer() def removeHashtagFromFilter(self, existingHashtag): if(existingHashtag[:1] != '#'): existingHashtag = '#' + existingHashtag if(existingHashtag in self.filter): return self.filter.remove(existingHashtag) logger.info("%s removed from the filter" % existingHashtag) self.restartStreamer() def removeAllHashtagsFromFilter(self): changes = 0 for i in self.filter: # we only want to rmeove hashtags, not other filters if(i[:1] == '#'): changes += 1 self.filter.remove(i) logger.info("All hashtags removed from the filter") if(changes != 0): self.restartStreamer()
class TwitterCrawlerAgent(spade.Agent.Agent): def log(self, msg): print '[TWITTER_CRAWLER] ' + str(msg) def init_stream(self): self.q = Queue() self.keywords = [] self.listener = TwitterStreamListener(self.keywords, self.q) auth = OAuthHandler(config.con_secret, config.con_secret_key) auth.set_access_token(config.token, config.token_key) self.stream = Stream(auth, self.listener) def restart_stream(self): self.log("Restarting stream") self.stream.disconnect() self.stream.filter(track=self.keywords, async=True, languages=["en"]) def update_stream(self, keywords): self.log("Updating stream with keywords: ") self.log(keywords) if len(keywords) != 0: self.keywords = keywords self.listener.set_keywords(self.keywords) self.restart_stream() else: self.log("Empty keyword list!") def prepare_and_send_message(self): self.log("Preparing message") receiver = spade.AID.aid(name=config.sentimenter, addresses=["xmpp://" + config.sentimenter]) self.msg = spade.ACLMessage.ACLMessage() self.msg.setPerformative("inform") self.msg.setOntology(config.raw_tweet) self.msg.addReceiver(receiver) content = self.get_tweets_from_queue() str_content = "" for tweet in content: str_content += tweet.serialize() + "|" str_content = str_content[:-1] self.msg.setContent(str_content) self.send(self.msg) self.log("Message sent") def get_tweets_from_queue(self): self.log("Preparing tweet package content") tweets_list = [] while len(tweets_list) < config.batch_size and not self.q.empty(): item = self.q.get() if item is None: break tweets_list.append(item) return tweets_list def _setup(self): # self.setDebugToScreen() self.init_stream() self.addBehaviour(ListenForTweetsBehav(10), None) template = spade.Behaviour.ACLTemplate() template.setOntology(config.keyword_msg) t = spade.Behaviour.MessageTemplate(template) self.addBehaviour(UpdateKeywordsBehav(), t)