def get_rsvp_by_event(self, urls, filename="rsvp_events"): agg = Agg(self._mdb.pastEventsCollection()) agg.addMatch({ "batchID": self._batchID, "event.group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("event.time", self._start_date, self._end_date) agg.addGroup({ "_id": "$event.group.urlname", "rsvp_count": { "$sum": "$event.yes_rsvp_count" } }) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "rsvps_by_event_view") formatter = CursorFormatter(agg.aggregate(), filename, self._format) formatter.output(fieldNames=["_id", "rsvp_count"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_member_history(self, urls, filename=None): ''' Got into every batch and see what the member count was for each group (URL) this uses all the batches to get a history of a group. Range is used to select batches in this case via the "timestamp" field. ''' audit = Audit(self._mdb) validBatches = list(audit.get_valid_batch_ids()) agg = Agg(self._mdb.groupsCollection()) if self._start_date or self._end_date: agg.addRangeMatch("timestamp", self._start_date, self._end_date) agg.addMatch({ "batchID": { "$in": validBatches }, "group.urlname": { "$in": urls } }) agg.addProject({ "_id": 0, "timestamp": 1, "batchID": 1, "urlname": "$group.urlname", "count": "$group.members" }) # agg.addGroup( { "_id" : { "ts": "$timestamp", "batchID" : "$batchID" }, # "groups" : { "$addToSet" : "$urlname" }, # "count" : { "$sum" : "$count"}}) # # CursorFormatter( agg.aggregate()).output() if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "groups_view") formatter = CursorFormatter(agg.aggregate(), filename, self._format) formatter.output( fieldNames=["timestamp", "batchID", "urlname", "count"], datemap=["timestamp"], limit=self._limit) if filename != "-": self._files.append(filename)
def joined_by_year(self): agg_pipe = Agg(self._collection) agg_pipe.addMatch({"batchID": self._audit.get_last_valid_batch_id()}) agg_pipe.addProject({ "_id": 0, "member_id": "$member.member_id", "member_name": "$member.member_name", "year": { "$year": "$member.join_time" }, }) agg_pipe.addGroup({"_id": "$year", "total_registered": {"$sum": 1}}) return agg_pipe.aggregate()
def get_RSVP_history(self, urls, filename=None): ''' Get the list of past events for groups (urls) and report on the date of the event and the number of RSVPs. ''' agg = Agg(self._mdb.pastEventsCollection()) agg.addMatch({"event.group.urlname": {"$in": urls}}) if self._start_date or self._end_date: agg.addRangeMatch("event.time", self._start_date, self._end_date) agg.addProject({ "timestamp": "$event.time", "event": "$event.name", "country": "$event.venue.country", "rsvp_count": "$event.yes_rsvp_count" }) agg.addMatch({"timestamp": {"$type": "date"}}) agg.addGroup({ "_id": "$timestamp", #"event" : { "$addToSet" : { "event" : "$event", "country" : "$country" }}, "rsvp_count": { "$sum": "$rsvp_count" } }) if self._sorter: agg.addSort(self._sorter) if filename: self._filename = filename if self._view: agg.create_view(self._mdb.database(), "rsvps_view") formatter = CursorFormatter(agg.aggregate(), self._filename, self._format) filename = formatter.output(fieldNames=["_id", "rsvp_count"], datemap=["_id"], limit=self._limit) if filename != "-": self._files.append(filename)
def getMembers(self, urls, filename=None): ''' Get a count of the members for each group in "urls" Range doens't make sense here so its not used. If supplied it is ignored. ''' agg = Agg(self._mdb.groupsCollection()) agg.addMatch({ "batchID": { "$in": [self._batchID] }, "group.urlname": { "$in": urls } }) agg.addProject({ "_id": 0, "urlname": "$group.urlname", "country": "$group.country", "batchID": 1, "member_count": "$group.member_count" }) if self._sorter: agg.addSort(self._sorter) if filename: self._filename = filename if self._view: agg.create_view(self._mdb.database(), "members_view") formatter = CursorFormatter(agg.aggregate(), self._filename, self._format) formatter.output( fieldNames=["urlname", "country", "batchID", "member_count"])
class Test(unittest.TestCase): def setUp(self): self._mdb = MUGAlyserMongoDB() self._agg = Agg(self._mdb.membersCollection()) def tearDown(self): pass def testFormatter(self): self._agg.addMatch({"member.member_name": "Joe Drumgoole"}) #print( "agg: %s" % self._agg ) self._agg.addProject({ "member.member_name": 1, "_id": 0, "member.join_time": 1, "member.city": 1, }) cursor = self._agg.aggregate() prefix = "agg_test_" filename = "JoeDrumgoole" ext = "json" self._formatter = CursorFormatter(cursor, prefix=prefix, name=filename, ext=ext) self._formatter.output(fieldNames=[ "member.member_name", "member.join_time", "member.city" ], datemap=["member.join_time"]) self.assertTrue(os.path.isfile(prefix + filename + "." + ext)) os.unlink(prefix + filename + "." + ext) def testFieldMapper(self): doc = {"a": "b"} newdoc = CursorFormatter.fieldMapper(doc, ['a']) self.assertTrue(newdoc.has_key("a")) doc = {"a": "b", "c": "d", "e": "f"} newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c']) self.assertTrue(newdoc.has_key("a")) self.assertTrue(newdoc.has_key("c")) self.assertFalse(newdoc.has_key("e")) doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x"}} newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"]) self.assertTrue(newdoc.has_key("a")) self.assertTrue(newdoc.has_key("c")) self.assertTrue(newdoc.has_key("z")) self.assertTrue(newdoc["z"].has_key("w")) self.assertFalse(newdoc.has_key("e")) doc = {"a": "b", "c": "d", "e": "f", "z": {"w": "x", "y": "p"}} newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w"]) self.assertTrue(newdoc.has_key("a")) self.assertTrue(newdoc.has_key("c")) self.assertTrue(newdoc.has_key("z")) self.assertTrue(newdoc["z"].has_key("w")) self.assertFalse(newdoc.has_key("e")) self.assertFalse(newdoc['z'].has_key("y")) doc = { "a": "b", "c": "d", "e": "f", "z": { "w": "x", "y": "p" }, "g": { "h": "i", "j": "k" } } newdoc = CursorFormatter.fieldMapper(doc, ['a', 'c', "z.w", "g.j"]) self.assertTrue(newdoc.has_key("a")) self.assertTrue(newdoc.has_key("c")) self.assertTrue(newdoc.has_key("z")) self.assertTrue(newdoc["z"].has_key("w")) self.assertFalse(newdoc.has_key("e")) self.assertFalse(newdoc['z'].has_key("y")) self.assertTrue(newdoc.has_key("g")) self.assertTrue(newdoc['g'].has_key("j")) self.assertFalse(newdoc['g'].has_key("h")) def testNestedDict(self): d = Nested_Dict({}) self.assertFalse(d.has_key("hello")) d.set_value("hello", "world") self.assertTrue(d.has_key("hello")) self.assertEqual(d.get_value("hello"), "world") self.assertRaises(KeyError, d.get_value, "Bingo") d.set_value("member.name", "Joe Drumgoole") self.assertEqual(d.get_value("member"), {"name": "Joe Drumgoole"}) def test_dateMapField(self): test_doc = {"a": 1, "b": datetime.datetime.now()} #pprint.pprint( test_doc ) _ = CursorFormatter.dateMapField(test_doc, "b")
class Members(MUGData): ''' classdocs ''' def __init__(self, mdb, collection_name=None): ''' Constructor ''' if collection_name is None: collection_name = "members" super(Members, self).__init__(mdb, collection_name) self._membersAgg = Agg(self._collection) self._membersAgg.addMatch({"member.name": {"$exists": 1}}) self._membersAgg.addProject({"_id": 0, "name": "$member.name"}) self._membersAgg.addGroup({"_id": "$name", "occurences": {"$sum": 1}}) self._membersAgg.addSort( Sorter(occurences=pymongo.DESCENDING)) # largest first self._memberCount = 0 def get_all_members(self, query=None): ''' Query meetup API for multiple groups. ''' return self.find(query) def get_by_name(self, name): member = self.find_one({"member.member_name": name}) if member is None: return None else: return member["member"] def get_by_ID(self, member_id): val = self.find_one({"member.member_id": member_id}) if val is None: return val else: return val["member"] def get_by_join_date(self, start, end): return self.find({"member.join_time": {"$gte": start, "$lte": end}}) def joined_by_year(self): agg_pipe = Agg(self._collection) agg_pipe.addMatch({"batchID": self._audit.get_last_valid_batch_id()}) agg_pipe.addProject({ "_id": 0, "member_id": "$member.member_id", "member_name": "$member.member_name", "year": { "$year": "$member.join_time" }, }) agg_pipe.addGroup({"_id": "$year", "total_registered": {"$sum": 1}}) return agg_pipe.aggregate() def distinct_members(self): return self._collection.distinct("member.name") def get_members(self): return self._membersAgg.aggregate() def summary(self, doc): return "name: %s, id: %s, country: %s" % (doc["member"]["member_name"], doc["member"]["member_id"], doc["member"]["country"]) def one_line(self, doc): return "name : %s, id: %s" % (doc["member"]["member_name"], doc["member"]["member_id"])