print "[ON ERROR]", status_code if status_code == 420: print "Sleeping", self.backoff time.sleep(self.backoff) self.backoff *= 2 # Exponential backoff if self.backoff > 15*60: # Don't sleep longer than 15 minutes self.backoff = 15*60 if __name__ == "__main__": from collections import Counter from load_terms import load_terms exception_catcher = Counter() stream = MyStreamer(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET) terms = load_terms() while True: #stream.statuses.filter(track=terms) #""" try: stream.statuses.filter(track=terms) except ChunkedEncodingError, e: exception_catcher[e] +=1 print "[SCRAPER ERROR]", e, exception_catcher[e] except Exception, e: exception_catcher[e] +=1 print "[SCRAPER ERROR]", e, exception_catcher[e] db.conn.close() raise e #"""
import sqlite3 from load_terms import load_terms import ConfigParser import time from contextlib import closing import numpy as np from kde import exp_decay from urlparse import urlparse import os here = os.path.dirname(__file__) config = ConfigParser.ConfigParser() config.read(os.path.join(here, 'connection.cfg')) DB_NAME = os.path.join(here, config.get('database','name')) tracked_terms = load_terms() class DbApi(object): def __init__(self, db_name=DB_NAME, conn=None): if not conn: conn = sqlite3.Connection(db_name) self.conn = conn c = conn.cursor() try: c.execute('SELECT 1 FROM ENTITIES') except: with open(os.path.join(here,'schema.sql'), 'r') as f: c.executescript(f.read()) self.conn.create_function("decay", 1, lambda x: exp_decay(x, halflife=300)) c.close() self.last_flushed = 0