def broadcast_tweets(): """get tweets from zmq and send them to all user_streams """ print "broadcast_tweets" sub = Subscriber(context=ctx) pub = Publisher(url="inproc://twit", context=ctx) while True: pub.send(marshal.dumps(json.loads(sub.recv())))
def user_stream(lon, lat, web_sock): sub = Subscriber(url="inproc://twit", context=ctx) sq_dist = .09 # print lat, lon, dist while True: tweet = marshal.loads(sub.recv()) _lat = tweet["location"]["lat"] _lon = tweet["location"]["lon"] _sq_dist = (lat-_lat)**2 + (lon-_lon)**2 if _sq_dist < sq_dist: web_sock.send(json.dumps(tweet))
def __init__(self, name, client=None, settings=None): self.name = name self.settings = settings self.client = client or es.ElasticSearch( urls=[cfg.get("ES_URL", "http://localhost:9200")]) self.sub = Subscriber() self._buffer = []
class Indexer(object): BATCH_SIZE = 100 def __init__(self, name, client=None, settings=None): self.name = name self.settings = settings self.client = client or es.ElasticSearch( urls=[cfg.get("ES_URL", "http://localhost:9200")]) self.sub = Subscriber() self._buffer = [] def setup(self): try: self.client.create_index(self.name, self.settings) except es.IndexAlreadyExistsError as err: pass self.client.put_mapping( "twit", "tweet", { "tweet": { "properties": { "location": {"type": "geo_point"}, "text": {"type": "string"}, "timestamp": {"type": "integer"}, }, "_ttl": { "enabled": True, "default": "15m" }, "_id": {"path": "id"}, } }) return self def index(self, tweet): self._buffer.append(tweet) if len(self._buffer) >= self.BATCH_SIZE: self.client.bulk_index(self.name, "tweet", self._buffer) self._buffer = [] print tweet def loop(self): while True: tweet = json.loads(self.sub.recv()) self.index(tweet)