class TweetStore(object): """Mongodb datastore for holding tweets. """ def __init__(self): self._store = MongoStore("TweetStore") def put(self, tweet): """Write a tweet to the store. Args: tweet (twitterlize.tweet.Tweet) : Tweet object """ doc = {"_id": tweet.original_data["id"]} doc.update(tweet.original_data) if tweet.is_retweet: doc["data_from_retweet"] = True self._store.save(doc) def get(self, tweet_id): doc = self._store.find_one(tweet_id) if doc: return Tweet(doc) return None
def __init__(self): self._store = MongoStore("TweetStore")
def test_memcache_cache(self): settings.MongoStores["unittest"]["cachetype"] = CacheType.Volatile store = MongoStore("unittest") actual = self._memcache.get("unittest:test") self.assertEqual("X", actual) store.save({"_id": "test", "hello": "world"}) actual = self._mongo.find_one("test") self.assertEqual({"_id": "test", "hello": "world"}, actual) actual = self._memcache.get("unittest:test") expected = serialize({"_id": "test", "hello": "world"}) self.assertEqual(expected, actual) self._mongo.delete_one("test") actual = self._mongo.find_one("test") self.assertEqual(None, actual) actual = store.find_one("test") self.assertEqual({"_id": "test", "hello": "world"}, actual) store.save({"_id": "test", "hello": "world"}) store.update({"_id": "test"}, {"$set":{"hello": "universe"}}) actual = self._memcache.get("unittest:test") expected = serialize({"_id": "test", "hello": "universe"}) self.assertEqual(expected, actual) store.delete_one("test") actual = store.find_one("test") self.assertEqual(None, actual) actual = self._memcache.get("unittest:test") self.assertEqual("X", actual)
def test_no_cache(self): store = MongoStore("unittest") actual = store.save({"_id": "test", "hello": "world"}) expected = "test" self.assertEqual(expected, actual) actual = store.find_one("test") expected = {"_id": "test", "hello": "world"} self.assertEqual(expected, actual) store.save({"_id": "test2", "goodbye": "world"}) actual = list(store.find({})) expected = [ {"_id": "test", "hello": "world"}, {"_id": "test2", "goodbye": "world"} ] self.assertEqual(expected, actual) actual = list(store.find({"goodbye": {"$exists": True}})) expected = [{"_id": "test2", "goodbye": "world"}] self.assertEqual(expected, actual) actual = list(store.find().limit(1)) expected = [{"_id": "test", "hello": "world"}] self.assertEqual(expected, actual) actual = store.count() self.assertEqual(2, actual) store.delete_one("test") actual = list(store.find()) expected = [{"_id": "test2", "goodbye": "world"}] store.update({"_id": "test2"}, {"$set":{"goodbye": "universe"}}) actual = store.find_one("test2") expected = {"_id": "test2", "goodbye": "universe"} self.assertEqual(expected, actual) store.update({"_id": "test3"}, {"$set":{"goodbye": "multiverse"}}, upsert=True) actual = store.find_one("test3") expected = {"_id": "test3", "goodbye": "multiverse"} self.assertEqual(expected, actual)
class CountStore(object): """Store counts of incoming items such as tweets, hashtags or user mentions. All counts are segmented by a segmentation (i.e. a country), entity_type (i.e. "hashtag" or "user mention"), an entity and a time period. We segment the counts by time period so that a rolling tally over a restricted time period can be kept. """ def __init__(self): self._store = MongoStore("CountStore") def put(self, entity_id, entity_type, segmentation, total_count=None): timeslices = get_timeslices() for timeslice in timeslices: match_criteria = { "entity_id": entity_id, "entity_type": entity_type, "segmentation": segmentation, "timeslice": timeslice, } if total_count: #Note - This is non-atomic and so could cause inaccuracies #TODO: Implement locking existing = list(self._store.find(match_criteria)) if existing: existing = existing[0] if existing.get("base_count"): update_op = { "$set": { "count": total_count - existing["base_count"] + 1 } } else: raise ValueError("Tried to increment doc with no base_count using total_count") else: update_op = { "$set": { "base_count": total_count, "count": 1 } } else: update_op = { "$inc": {"count": 1} } self._store.update(match_criteria, update_op, upsert=True) def get_top(self, entity_type, segmentation, num_to_get, timestamp=None): """Get the top entities in this segmentation.""" result = [] timestamp = timestamp or int(time.time()) #We use the latest COMPLETE timeslice, not the latest one timeslice = get_timeslices(timestamp)[-1] query = { "entity_type": entity_type, "segmentation": segmentation, "timeslice": timeslice, } fields = { "entity_id": 1, "count": 1, } docs = self._store.find(query, fields=fields).sort("count", -1).limit(num_to_get) for doc in docs: result.append((doc["entity_id"], doc["count"])) return result
def __init__(self): self._store = MongoStore("CountStore")