Esempio n. 1
0
 def safe_listen(self):
     abnormal_exception = None
     try:
         self.listen_stream()
     except tweetstream.ReconnectImmediatelyError as rie:
         logger.error('got %s in stream: %s',
                      type(rie),
                      rie)
         self.update_wait()
     except tweetstream.ReconnectLinearlyError as rie:
         logger.error('got %s in stream: %s',
                      type(rie),
                      rie)
         self.update_wait(amount=1)
     except tweetstream.ReconnectExponentiallyError as rie:
         logger.error('got %s in stream: %s',
                      type(rie),
                      rie)
         self.update_wait(amount=1, factor=2)
     except Exception as e:
         logger.error('got abnormal exception in stream: %s',e,exc_info=True)
         abnormal_exception = e
     finally:
         if abnormal_exception:
             return abnormal_exception
         if self.last_wait and time.time() - self.last_wait > 15*60:
             logger.info('resetting time wait (last wait: %s)',
                         self.last_wait)
             self.update_wait(reset=True)
         self.last_wait = time.time()
         logger.info("will sleep %s secs",self.wait_secs)
         time.sleep(self.wait_secs)
Esempio n. 2
0
 def log_request(self, handler):
     request_time = 1000.0 * handler.request.request_time()
     logger.info('HTTP %s %s %s (%.2f ms)',
                 handler._request_summary(),
                 handler.request.headers,
                 handler.get_status(),
                 request_time)
Esempio n. 3
0
 def __call__(self):
     logger.info('stream thread main loop')
     while self.is_alive() and not self.should_stop:
         status = self.safe_listen()
         if status:
             if type(status) == StopIteration:
                 logger.warn('got stop loop message, exiting stream listener')
                 return
             logger.warn('got exception, will resume loop')
Esempio n. 4
0
 def fetch(self, query):
     last_tweet_id = self.store.retrieve_last_tweet_id(query)
     logger.info("query: %s last query tweet: %s", query, last_tweet_id)
     url = None
     if not last_tweet_id:
         url = self.first_query_url % {"query": urllib.quote(query)}
         yield self._fetch(query, url, recurse=False)
     else:
         url = self.update_query_url % {"query": urllib.quote(query), "last_tweet_id": last_tweet_id}
         yield self._fetch(query, url)
Esempio n. 5
0
 def _fetch(self, query, url, recurse=True):
     logger.info("fetching: %s", url)
     try:
         response = yield cyclone_fetch(url, headers=self.headers)
         self.handle_response(query, url, response, recurse=recurse)
     except EnhanceYourCalmException:
         logger.error("HTTP 429 Too Many Requests ; will retry call later")
         reactor.callLater(5, self._fetch, *[query, url], **{"recurse": recurse})
     except Exception as e:
         logger.error("url fetch error: %s:%s", type(e), e, exc_info=True)
Esempio n. 6
0
 def handle_response(self, query, url, response, recurse=True):
     logger.info("url: %s => response: %s", url, response.code)
     if response.code != 200:
         if response.code == 429:
             raise EnhanceYourCalmException("fetch error : %s (%s)" % (response.code, response.body))
     data = json.loads(response.body)
     results = data["results"]
     results.sort(key=itemgetter("id"))
     logger.info("got %d tweets (last tweet id: %s)", len(results), len(results) and results[-1]["id"] or None)
     for result in results:
         tweet = Tweet.parse(result)
         self.store.append(query, tweet)
     if recurse and data.has_key("next_page"):
         new_url = self.url_base + data["next_page"]
         self.handle_response(query, new_url, response)
Esempio n. 7
0
 def listen_stream(self):
     with tweetstream.FilterStream(self.user_password[0], 
                                   self.user_password[1], 
                                   track=self._queries) as stream:
         for tweet_data in stream:
             tweet = Tweet.parse(tweet_data)
             logger.info('got candidate tweet: %s', tweet.hashtags)
             if self.is_interesting_tweet(tweet):
                 self.ts_store.append('*',tweet)
             logger.info('got interesting tweet: %s', tweet)
             if self.should_stop or not self.is_alive():
                 logger.info('stop command detected')
                 break
     if self.should_stop or not self.is_alive():
         raise StopIteration('should stop now !')
Esempio n. 8
0
 def __call__(self):
     logger.info("-- monitor awakened --")
     for query in self.queries:
         self.fetch(query)
Esempio n. 9
0
 def stop(self):
     logger.info('stream thread stop called')
     self.should_stop = True
     Thread.join(self, timeout=1)
Esempio n. 10
0
from twisted.internet import reactor

from varan import logger, VERSION
from varan.ts_store import TSStore
from varan.stream import Stream
from varan.application import Application

parser = argparse.ArgumentParser(description="varan : realtime twitter monitoring")
parser.add_argument("--config", "-c", required=True)
parser.add_argument("--user", "-u", required=True)

if __name__ == "__main__":
    import sys

    logger.info("-" * 20 + " varan v.%s " % VERSION + "-" * 20)

    args = parser.parse_args()
    config = ConfigParser.ConfigParser()
    config.read(args.config)
    config.add_section("authentication")
    config.set("authentication", "password", args.user)

    store = TSStore(config)
    store.queries = [q.strip() for q in config.get("timeseries", "queries").split(",")]

    stream = Stream(config, store)

    try:
        # deferToThread(stream.__call__)
        stream.start()