def get_rsvp_by_event(self, urls, filename="rsvp_events"): agg = Agg(self._mdb.pastEventsCollection()) agg.addMatch({ "batchID": self._batchID, "event.group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("event.time", self._start_date, self._end_date) agg.addGroup({ "_id": "$event.group.urlname", "rsvp_count": { "$sum": "$event.yes_rsvp_count" } }) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "rsvps_by_event_view") formatter = CursorFormatter(agg.aggregate(), filename, self._format) formatter.output(fieldNames=["_id", "rsvp_count"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_attendees(self, urls, filename="attendees"): agg = Agg(self._mdb.attendeesCollection()) agg.addMatch({ "batchID": self._batchID, "info.event.group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("info.event.time", self._start_date, self._end_date) agg.addProject({ "_id": 0, "name": "$info.attendee.member.name", "id": "$info.attendee.member.id", "event": "$info.event.name", "group": "$info.event.group.name", "date": "$info.event.time" }) if self._view: agg.create_view(self._mdb.database(), "attendees_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output(fieldNames=["name", "id", "event", "group", "date"], datemap=["date"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_member_history(self, urls, filename=None): ''' Got into every batch and see what the member count was for each group (URL) this uses all the batches to get a history of a group. Range is used to select batches in this case via the "timestamp" field. ''' audit = Audit(self._mdb) validBatches = list(audit.get_valid_batch_ids()) agg = Agg(self._mdb.groupsCollection()) if self._start_date or self._end_date: agg.addRangeMatch("timestamp", self._start_date, self._end_date) agg.addMatch({ "batchID": { "$in": validBatches }, "group.urlname": { "$in": urls } }) agg.addProject({ "_id": 0, "timestamp": 1, "batchID": 1, "urlname": "$group.urlname", "count": "$group.members" }) # agg.addGroup( { "_id" : { "ts": "$timestamp", "batchID" : "$batchID" }, # "groups" : { "$addToSet" : "$urlname" }, # "count" : { "$sum" : "$count"}}) # # CursorFormatter( agg.aggregate()).output() if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "groups_view") formatter = CursorFormatter(agg.aggregate(), filename, self._format) formatter.output( fieldNames=["timestamp", "batchID", "urlname", "count"], datemap=["timestamp"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_rsvps(self, urls, filename=None): ''' Lookup RSVPs by user. So for each user collect how many events they RSVPed to. ''' agg = Agg(self._mdb.attendeesCollection()) agg.addMatch({ "batchID": self._batchID, "info.event.group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("info.event.time", self._start_date, self._end_date) agg.addProject({ "_id": 0, "attendee": "$info.attendee.member.name", "group": "$info.event.group.urlname", "date": "$info.event.time", "event_name": "$info.event.name" }) # agg.addGroup( { "_id" : { "attendee": "$attendee", "group": "$group" }, # "event_count" : { "$sum" : 1 }}) # # agg.addProject( { "_id" : 0, # "attendee" : "$_id.attendee", # "group" : "$_id.group", # "date" : "$event.time", # "event_count" : 1 } ) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "rsvps_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output( fieldNames=["attendee", "group", "date", "event_name"], datemap=["date"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_active_users(self, urls, filename=None): ''' We define an active user as somebody who has rsvp'd to at least one event in the last six months. ''' agg = Agg(self._mdb.attendeesCollection()) agg.addMatch({ "batchID": self._batchID, "info.event.group.urlname": { "$in": urls }, "info.attendee.rsvp.response": "yes" }) if self._start_date or self._end_date: agg.addRangeMatch("info.event.time", self._start_date, self._end_date) else: agg.addRangeMatch("info.event.time", datetime.utcnow() + relativedelta(months=-6)) # agg.addProject( { "_id" : 0, # "name" : "$info.attendee.member.name", # "urlname" : "$info.event.group.urlname", # "event_name" : "$info.event.name" }) agg.addProject({ "_id": 0, "name": "$info.attendee.member.name", "event": "$info.event.name", "date": "$info.event.time" }) print(agg) agg.addGroup({"_id": "$name", "count": {"$sum": 1}}) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "active_users_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output(fieldNames=["_id", "count"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_events(self, urls, when="past", filename=None): ''' Get events when=past means past events. when="upcoming" means future events. ''' agg = None if when == "past": agg = Agg(self._mdb.pastEventsCollection()) elif when == "upcoming": agg = Agg(self._mdb.upcomingEventsCollection()) agg.addMatch({ "batchID": self._batchID, "event.status": when, "event.group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("event.time", self._start_date, self._end_date) agg.addProject({ "_id": 0, "group": u"$event.group.urlname", "name": u"$event.name", "country": "$event.venue.country", "rsvp_count": "$event.yes_rsvp_count", "date": "$event.time" }) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "events_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output(fieldNames=["group", "name", "rsvp_count", "date"], datemap=["date"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_RSVP_history(self, urls, filename=None): ''' Get the list of past events for groups (urls) and report on the date of the event and the number of RSVPs. ''' agg = Agg(self._mdb.pastEventsCollection()) agg.addMatch({ "batchID": self._batchID, "event.group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("event.time", self._start_date, self._end_date) agg.addProject({ "_id": 0, "timestamp": "$event.time", "urlname": "$event.group.urlname", "event": "$event.name", "rsvp_count": "$event.yes_rsvp_count" }) # agg.addMatch( { "timestamp" : { "$type" : "date" }} ) # agg.addGroup( { "_id" :"$timestamp", # #"event" : { "$addToSet" : { "event" : "$event", "country" : "$country" }}, # "rsvp_count" : { "$sum" : "$rsvp_count"}}) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "rsvps_view") formatter = CursorFormatter(agg.aggregate(), filename, self._format) formatter.output( fieldNames=["timestamp", "urlname", "event", "rsvp_count"], datemap=["timestamp"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_organisers(self, urls, filename=None): ''' Get all the members of all the groups (urls). Range is join_time. ''' agg = Agg(self._mdb.proGroupsCollection()) agg.addMatch({"batchID": self._batchID}) agg.addMatch({"group.urlname": {"$in": urls}}) if self._start_date or self._end_date: agg.addRangeMatch("group.created", self._start_date, self._end_date) agg.addUnwind("$group.organizers") agg.addGroup({ "_id": "$group.organizers", "groups": { "$addToSet": "$group.urlname" } }) agg.addProject({ "_id": 0, "name": "$_id.name", "role": "$_id.permission", "groups": "$groups" "" }) print(agg) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "new_members_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output(fieldNames=["name", "role", "groups"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_new_members(self, urls, filename=None): ''' Get all the members of all the groups (urls). Range is join_time. ''' agg = Agg(self._mdb.proMembersCollection()) agg.addMatch({"batchID": self._batchID}) if self._start_date or self._end_date: agg.addRangeMatch("member.join_time", self._start_date, self._end_date) agg.addUnwind("$member.chapters") agg.addMatch({"member.chapters.urlname": {"$in": urls}}) agg.addProject({ "_id": 0, "group": "$member.chapters.urlname", "name": "$member.member_name", "join_date": "$member.join_time" }) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "new_members_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output(fieldNames=["group", "name", "join_date"], datemap=['join_date'], limit=self._limit) if filename != "-": self._files.append(filename)
def get_members(self, urls, filename=None): ''' Get a count of the members for each group in "urls" Range doens't make sense here so its not used. If supplied it is ignored. ''' matcher = match({ "batchID": { "$in": [self._batchID] }, "group.urlname": { "$in": urls } }) projector = project({ "_id": 0, "urlname": "$group.urlname", "country": "$group.country", "batchID": 1, "members": "$group.members" }) pipeline = Pipeline(matcher, projector) if self._sorter: pipeline.append(self._sorter) formatter = CursorFormatter( pipelinr.aggregate(self._mdb.groupsCollection()), filename, self._format) formatter.printCursor( fieldNames=["urlname", "country", "batchID", "members"]) if filename != "-": self._files.append(filename)
def get_total_events(self, urls, when="past", filename=None): agg = None if when == "past": agg = Agg(self._mdb.pastEventsCollection()) elif when == "upcoming": agg = Agg(self._mdb.upcomingEventsCollection()) if self._start_date or self._end_date: agg.addRangeMatch("event.time", self._start_date, self._end_date) agg.addMatch({ "batchID": self._batchID, "event.status": when, "event.group.urlname": { "$in": urls } }) agg.addProject({ "_id": 0, "group": "$event.group.urlname", "rsvp": "$event.group.yes_rsvp_count", "month": { "$month": "$event.time" }, "year": { "$year": "$event.time" } }) agg.addGroup({ "_id": { "month": "$month", "year": "$year" }, "count": { "$sum": 1 } }) agg.addProject({ "month": "$_id.month", "year": "$_id.year", "count": "$count" }) if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "total_events") if filename: self._filename = filename formatter = CursorFormatter(agg, self._filename, self._format) filename = formatter.output(fieldNames=["month", "year", "count"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_group_totals(self, urls, filename=None): ''' get the total number of RSVPs by group. ''' #agg = Agg(self._mdb.pastEventsCollection()) agg_pipeline = Pipeline() agg_pipeline.append( match({ "batchID": self._batchID, "event.status": "past", "event.group.urlname": { "$in": urls } })) # agg.addMatch({"batchID": self._batchID, # "event.status": "past", # "event.group.urlname": {"$in": urls}}) if self._start_date or self._end_date: agg_pipeline.append( range_match("event.time", self._start_date, self._end_date)) agg_pipeline.append( group({ "_id": { "urlname": "$event.group.urlname", "year": { "$year": "$event.time" } }, "event_count": { "$sum": 1 }, "rsvp_count": { "$sum": "$event.yes_rsvp_count" } })) # agg.addGroup({"_id": {"urlname": "$event.group.urlname", # "year": {"$year": "$event.time"}}, # "event_count": {"$sum": 1}, # "rsvp_count": {"$sum": "$event.yes_rsvp_count"}}) agg_pipeline.append( project({ "_id": 0, "group": "$_id.urlname", "year": "$_id.year", "event_count": 1, "rsvp_count": 1 })) # agg.addProject({"_id": 0, # "group": "$_id.urlname", # "year": "$_id.year", # "event_count": 1, # "rsvp_count": 1}) if self._sorter: agg_pipeline.append(sort(self._sorter)) agg_pipeline.pp() formatter = CursorFormatter( agg_pipeline.aggregate(self._mdb.pastEventsCollection()), filename, self._format) formatter.output( fieldNames=["year", "group", "event_count", "rsvp_count"], limit=self._limit) if filename != "-": self._files.append(filename)
def get_groups(self, urls, filename=None): ''' Get all the groups listed by urls and their start dates ''' if self._pro_account: agg = Agg(self._mdb.proGroupsCollection()) agg.addMatch({ "batchID": self._batchID, "group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("group.founded_date", self._start_date, self._end_date) agg.addProject({ "_id": 0, "urlname": "$group.urlname", "url": { "$concat": ["http://www.meetup.com/", "$group.urlname"] }, "members": "$group.member_count", "city": "$group.city", "country": "$group.country", "last_event": "$group.last_event", "founded": "$group.founded_date" }) # print( "Using pro search : %i" % self._batchID ) # agg.echo() else: agg = Agg(self._mdb.groupsCollection()) agg.addMatch({ "batchID": self._batchID, "group.urlname": { "$in": urls } }) if self._start_date or self._end_date: agg.addRangeMatch("group.created", self._start_date, self._end_date) agg.addProject({ "_id": 0, "urlname": "$group.urlname", "url": { "$concat": ["http://www.meetup.com/", "$group.urlname"] }, "members": "$group.members", "city": "$group.city", "country": "$group.country", "last_event": "not present", "founded": "$group.created" }) print("Using nopro search") if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "groups_view") formatter = CursorFormatter(agg, filename, self._format) formatter.output(fieldNames=[ "urlname", "url", "last_event", "members", "city", "country", "founded" ], datemap=["last_event", "founded"], limit=self._limit) if filename != "-": self._files.append(filename) return filename