def safe_listen(self): abnormal_exception = None try: self.listen_stream() except tweetstream.ReconnectImmediatelyError as rie: logger.error('got %s in stream: %s', type(rie), rie) self.update_wait() except tweetstream.ReconnectLinearlyError as rie: logger.error('got %s in stream: %s', type(rie), rie) self.update_wait(amount=1) except tweetstream.ReconnectExponentiallyError as rie: logger.error('got %s in stream: %s', type(rie), rie) self.update_wait(amount=1, factor=2) except Exception as e: logger.error('got abnormal exception in stream: %s',e,exc_info=True) abnormal_exception = e finally: if abnormal_exception: return abnormal_exception if self.last_wait and time.time() - self.last_wait > 15*60: logger.info('resetting time wait (last wait: %s)', self.last_wait) self.update_wait(reset=True) self.last_wait = time.time() logger.info("will sleep %s secs",self.wait_secs) time.sleep(self.wait_secs)
def log_request(self, handler): request_time = 1000.0 * handler.request.request_time() logger.info('HTTP %s %s %s (%.2f ms)', handler._request_summary(), handler.request.headers, handler.get_status(), request_time)
def __call__(self): logger.info('stream thread main loop') while self.is_alive() and not self.should_stop: status = self.safe_listen() if status: if type(status) == StopIteration: logger.warn('got stop loop message, exiting stream listener') return logger.warn('got exception, will resume loop')
def fetch(self, query): last_tweet_id = self.store.retrieve_last_tweet_id(query) logger.info("query: %s last query tweet: %s", query, last_tweet_id) url = None if not last_tweet_id: url = self.first_query_url % {"query": urllib.quote(query)} yield self._fetch(query, url, recurse=False) else: url = self.update_query_url % {"query": urllib.quote(query), "last_tweet_id": last_tweet_id} yield self._fetch(query, url)
def _fetch(self, query, url, recurse=True): logger.info("fetching: %s", url) try: response = yield cyclone_fetch(url, headers=self.headers) self.handle_response(query, url, response, recurse=recurse) except EnhanceYourCalmException: logger.error("HTTP 429 Too Many Requests ; will retry call later") reactor.callLater(5, self._fetch, *[query, url], **{"recurse": recurse}) except Exception as e: logger.error("url fetch error: %s:%s", type(e), e, exc_info=True)
def handle_response(self, query, url, response, recurse=True): logger.info("url: %s => response: %s", url, response.code) if response.code != 200: if response.code == 429: raise EnhanceYourCalmException("fetch error : %s (%s)" % (response.code, response.body)) data = json.loads(response.body) results = data["results"] results.sort(key=itemgetter("id")) logger.info("got %d tweets (last tweet id: %s)", len(results), len(results) and results[-1]["id"] or None) for result in results: tweet = Tweet.parse(result) self.store.append(query, tweet) if recurse and data.has_key("next_page"): new_url = self.url_base + data["next_page"] self.handle_response(query, new_url, response)
def listen_stream(self): with tweetstream.FilterStream(self.user_password[0], self.user_password[1], track=self._queries) as stream: for tweet_data in stream: tweet = Tweet.parse(tweet_data) logger.info('got candidate tweet: %s', tweet.hashtags) if self.is_interesting_tweet(tweet): self.ts_store.append('*',tweet) logger.info('got interesting tweet: %s', tweet) if self.should_stop or not self.is_alive(): logger.info('stop command detected') break if self.should_stop or not self.is_alive(): raise StopIteration('should stop now !')
def __call__(self): logger.info("-- monitor awakened --") for query in self.queries: self.fetch(query)
def stop(self): logger.info('stream thread stop called') self.should_stop = True Thread.join(self, timeout=1)
from twisted.internet import reactor from varan import logger, VERSION from varan.ts_store import TSStore from varan.stream import Stream from varan.application import Application parser = argparse.ArgumentParser(description="varan : realtime twitter monitoring") parser.add_argument("--config", "-c", required=True) parser.add_argument("--user", "-u", required=True) if __name__ == "__main__": import sys logger.info("-" * 20 + " varan v.%s " % VERSION + "-" * 20) args = parser.parse_args() config = ConfigParser.ConfigParser() config.read(args.config) config.add_section("authentication") config.set("authentication", "password", args.user) store = TSStore(config) store.queries = [q.strip() for q in config.get("timeseries", "queries").split(",")] stream = Stream(config, store) try: # deferToThread(stream.__call__) stream.start()