Exemple #1
0
def broadcast_tweets():
    """get tweets from zmq and send them to all user_streams
    """
    print "broadcast_tweets"
    sub = Subscriber(context=ctx)
    pub = Publisher(url="inproc://twit", context=ctx)
    while True:
        pub.send(marshal.dumps(json.loads(sub.recv())))
Exemple #2
0
def user_stream(lon, lat, web_sock):
    sub = Subscriber(url="inproc://twit", context=ctx)
    sq_dist = .09
    # print lat, lon, dist
    while True:
        tweet = marshal.loads(sub.recv())
        _lat = tweet["location"]["lat"]
        _lon = tweet["location"]["lon"]
        _sq_dist = (lat-_lat)**2 + (lon-_lon)**2
        if _sq_dist < sq_dist:
            web_sock.send(json.dumps(tweet))
Exemple #3
0
 def __init__(self, name, client=None, settings=None):
     self.name = name
     self.settings = settings
     self.client = client or es.ElasticSearch(
         urls=[cfg.get("ES_URL", "http://localhost:9200")])
     self.sub = Subscriber()
     self._buffer = []
Exemple #4
0
class Indexer(object):
    BATCH_SIZE = 100
    def __init__(self, name, client=None, settings=None):
        self.name = name
        self.settings = settings
        self.client = client or es.ElasticSearch(
            urls=[cfg.get("ES_URL", "http://localhost:9200")])
        self.sub = Subscriber()
        self._buffer = []

    def setup(self):
        try:
            self.client.create_index(self.name, self.settings)
        except es.IndexAlreadyExistsError as err:
            pass
        self.client.put_mapping(
            "twit",
            "tweet", {
                "tweet": {
                    "properties": {
                        "location": {"type": "geo_point"},
                        "text": {"type": "string"},
                        "timestamp": {"type": "integer"},
                    },
                    "_ttl": {
                        "enabled": True,
                        "default": "15m"
                    },
                    "_id": {"path": "id"},
                }
            })
        return self

    def index(self, tweet):
        self._buffer.append(tweet)
        if len(self._buffer) >= self.BATCH_SIZE:
            self.client.bulk_index(self.name, "tweet", self._buffer)
            self._buffer = []
            print tweet

    def loop(self):
        while True:
            tweet = json.loads(self.sub.recv())
            self.index(tweet)