def __init__(self, mdb, output_filename="-", formatter="json", batchID=None, limit=None, view=None): self._mdb = mdb audit = Audit(mdb) self._sorter = None self._start_date = None self._end_date = None self._filename = output_filename self._format = formatter self._files = [] self._limit = limit self._view = view if batchID is None: self._batchID = audit.getCurrentValidBatchID() else: self._batchID = batchID self._pro_account = audit.isProBatch(self._batchID)
def get_batches(mdb, start, end): audit = Audit(mdb) for i in audit.get_valid_batches(start, end): print("BatchID :%i End : %s " % (i["batchID"], i["end"].strftime("%d-%b-%Y %H:%M.%S")))
def get_batches(mdb, start, end, limit=None): audit = Audit(mdb) c = CursorFormatter(audit.getCurrentValidBatches(start, end)) c.output(["batchID", "end", "start"], datemap=["start", "end"], limit=limit)
def test_write_group(self): self._mdb = MUGAlyserMongoDB("mongodb://localhost:27017/TESTWRITER") self._audit = Audit(self._mdb) batchID = self._audit.start_batch({"test": 1}) self._writer = MeetupWriter(get_meetup_key(), batchID, self._mdb) self._writer.write_groups("nopro", ["DublinMUG"]) self._writer.write_groups("pro", ["DublinMUG"]) self.assertTrue(self._mdb.groupsCollection().find_one( {"group.urlname": "DublinMUG"})) self.assertTrue(self._mdb.proGroupsCollection().find_one( {"group.urlname": "DublinMUG"})) self._audit.end_batch(batchID)
def testProcessMembers(self): self._mdb = MUGAlyserMongoDB("mongodb://localhost:27017/TESTWRITER") self._audit = Audit(self._mdb) batchID = self._audit.start_batch({"test": 2}) self._writer = MeetupWriter(get_meetup_key(), batchID, self._mdb) self._writer.write_members("pro", ["DublinMUG"]) self._writer.write_members("nopro", ["DublinMUG"]) self.assertTrue(self._mdb.proMembersCollection().find_one( {"member.member_name": "Joe Drumgoole"})) self.assertTrue(self._mdb.membersCollection().find_one( {"member.name": "Joe Drumgoole"})) self._audit.end_batch(batchID)
def get_member_history(self, urls, filename=None): ''' Got into every batch and see what the member count was for each group (URL) this uses all the batches to get a history of a group. Range is used to select batches in this case via the "timestamp" field. ''' audit = Audit(self._mdb) validBatches = list(audit.get_valid_batch_ids()) agg = Agg(self._mdb.groupsCollection()) if self._start_date or self._end_date: agg.addRangeMatch("timestamp", self._start_date, self._end_date) agg.addMatch({ "batchID": { "$in": validBatches }, "group.urlname": { "$in": urls } }) agg.addProject({ "_id": 0, "timestamp": 1, "batchID": 1, "urlname": "$group.urlname", "count": "$group.members" }) # agg.addGroup( { "_id" : { "ts": "$timestamp", "batchID" : "$batchID" }, # "groups" : { "$addToSet" : "$urlname" }, # "count" : { "$sum" : "$count"}}) # # CursorFormatter( agg.aggregate()).output() if self._sorter: agg.addSort(self._sorter) if self._view: agg.create_view(self._mdb.database(), "groups_view") formatter = CursorFormatter(agg.aggregate(), filename, self._format) formatter.output( fieldNames=["timestamp", "batchID", "urlname", "count"], datemap=["timestamp"], limit=self._limit) if filename != "-": self._files.append(filename)
def __init__(self, mdb): ''' Constructor ''' super(Members, self).__init__(mdb, "members") self._membersAgg = Agg(self._collection) self._membersAgg.addMatch({"member.name": {"$exists": 1}}) self._membersAgg.addProject({"_id": 0, "name": "$member.name"}) self._membersAgg.addGroup({"_id": "$name", "occurences": {"$sum": 1}}) self._membersAgg.addSort( Sorter(occurences=pymongo.DESCENDING)) # largest first self._memberCount = 0 self._feedback = Feedback() self._audit = Audit(mdb)
class Test(unittest.TestCase): def tearDown(self): self._mdb.client().drop_database("TESTWRITER") def test_write_group(self): self._mdb = MUGAlyserMongoDB("mongodb://localhost:27017/TESTWRITER") self._audit = Audit(self._mdb) batchID = self._audit.start_batch({"test": 1}) self._writer = MeetupWriter(get_meetup_key(), batchID, self._mdb) self._writer.write_groups("nopro", ["DublinMUG"]) self._writer.write_groups("pro", ["DublinMUG"]) self.assertTrue(self._mdb.groupsCollection().find_one( {"group.urlname": "DublinMUG"})) self.assertTrue(self._mdb.proGroupsCollection().find_one( {"group.urlname": "DublinMUG"})) self._audit.end_batch(batchID) def testProcessMembers(self): self._mdb = MUGAlyserMongoDB("mongodb://localhost:27017/TESTWRITER") self._audit = Audit(self._mdb) batchID = self._audit.start_batch({"test": 2}) self._writer = MeetupWriter(get_meetup_key(), batchID, self._mdb) self._writer.write_members("pro", ["DublinMUG"]) self._writer.write_members("nopro", ["DublinMUG"]) self.assertTrue(self._mdb.proMembersCollection().find_one( {"member.member_name": "Joe Drumgoole"})) self.assertTrue(self._mdb.membersCollection().find_one( {"member.name": "Joe Drumgoole"})) self._audit.end_batch(batchID)
def __init__(self, mdb, formatter="json", batchID=None, limit=None, view=None): self._mdb = mdb audit = Audit(mdb) self._sorter = None self._start_date = None self._end_date = None self._format = formatter self._files = [] self._limit = limit self._view = view if batchID is None: self._batchID = audit.get_last_valid_batch_id() else: self._batchID = batchID self._pro_account = audit.isProBatch(self._batchID)
with open('keys.txt', 'r') as f: skey = f.readline().strip("\n") app.config['SECRET_KEY'] = skey with open('uri.txt', 'r') as f: uri = f.readline().strip("\n") try: print "Connecting to database..." mdb = MUGAlyserMongoDB(uri=uri) except Exception as e: print "Error", e print "URI isn't valid, trying to run on localhost now" mdb = MUGAlyserMongoDB() auditdb = Audit(mdb) an = MUG_Analytics(mdb) membersCollection = mdb.membersCollection() proMemCollection = mdb.proMembersCollection() groupCollection = mdb.groupsCollection() proGrpCollection = mdb.proGroupsCollection() eventsCollection = mdb.pastEventsCollection() currentBatch = auditdb.get_last_valid_batch_id() connection = mdb.client() db = connection.MUGS userColl = db.users resetColl = db.resets euList = an.get_group_names('EU') usList = an.get_group_names('US')
class Members(MUGData): ''' classdocs ''' def __init__(self, mdb): ''' Constructor ''' super(Members, self).__init__(mdb, "members") self._membersAgg = Agg(self._collection) self._membersAgg.addMatch({"member.name": {"$exists": 1}}) self._membersAgg.addProject({"_id": 0, "name": "$member.name"}) self._membersAgg.addGroup({"_id": "$name", "occurences": {"$sum": 1}}) self._membersAgg.addSort( Sorter(occurences=pymongo.DESCENDING)) # largest first self._memberCount = 0 self._feedback = Feedback() self._audit = Audit(mdb) def get_group_members(self, url_name, q=None): ''' returns a MongoDB cursor. ''' query = {"member.chapters": {"$elemMatch": {"urlname": url_name}}} if q: query.update(q) return self.find(query) def get_many_group_members(self, groups, query=None): ''' returns a generator ''' return itertools.chain( *[self.get_group_members(i, query) for i in groups]) def count_members(self, groups): total = 0 for i in groups: count = self.get_group_members(i).count() total = total + count return count def get_all_members(self, query=None): ''' Query meetup API for multiple groups. ''' return self.find(query) def distinct_members(self): return self._collection.distinct("member.member_name") def get_by_name(self, name): member = self.find_one({"member.member_name": name}) if member is None: return None else: return member["member"] def get_by_ID(self, member_id): val = self.find_one({"member.member_id": member_id}) if val is None: return val else: return val["member"] def get_by_join_date(self, start, end): return self.find({"member.join_time": {"$gte": start, "$lte": end}}) def joined_by_year(self): agg_pipe = Agg(self._collection) agg_pipe.addMatch({"batchID": self._audit.getCurrentBatchID()}) agg_pipe.addProject({ "_id": 0, "member_id": "$member.member_id", "member_name": "$member.member_name", "year": { "$year": "$member.join_time" }, }) agg_pipe.addGroup({"_id": "$year", "total_registered": {"$sum": 1}}) return agg_pipe.aggregate() def get_members(self): return self._membersAgg.aggregate() def summary(self, doc): return "name: %s, id: %s, country: %s" % (doc["member"]["member_name"], doc["member"]["member_id"], doc["member"]["country"]) def one_line(self, doc): return "name : %s, id: %s" % (doc["member"]["member_name"], doc["member"]["member_id"])
def setUp(self): self._mdb = MUGAlyserMongoDB( uri="mongodb://localhost/TEST_AUDIT" ) self._audit = Audit( self._mdb )
class Test_audit(unittest.TestCase): def setUp(self): self._mdb = MUGAlyserMongoDB( uri="mongodb://localhost/TEST_AUDIT" ) self._audit = Audit( self._mdb ) def tearDown(self): self._mdb.client().drop_database( "TEST_AUDIT" ) #@unittest.skip def test_get_current_batch_id(self): self.assertFalse( self._audit.in_batch()) batch_id = self._audit.start_batch( doc = { "test" : "doc"}) self.assertTrue( self._audit.in_batch()) self._audit.end_batch( batch_id ) self.assertTrue( self._audit.get_batch( batch_id )) self.assertFalse( self._audit.in_batch()) self.assertEqual( batch_id, self._audit.get_last_valid_batch_id()) def test_get_valid_batches(self): id1 = self._audit.start_batch( doc = { "test" : "doc"}) id2 = self._audit.start_batch( doc = { "test" : "doc"}) self.assertTrue( self._audit.in_batch()) self._audit.end_batch( id2 ) self.assertTrue( self._audit.in_batch()) self._audit.end_batch( id1 ) batch = self._audit.get_batch_end( id1 ) self.assertGreaterEqual( batch[ 'end'], parse( "1-Jun-2017", ) ) self.assertFalse( self._audit.in_batch()) idlist = list( self._audit.get_valid_batch_ids()) self.assertTrue( id1 in idlist ) self.assertTrue( id2 in idlist ) def test_get_last_batch_id(self): id1 = self._audit.start_batch( doc = { "test" : "doc"}) id2 = self._audit.start_batch( doc = { "test" : "doc"}) self.assertEqual( 101, self._audit.get_last_batch_id()) self._audit.end_batch( id2 ) self.assertEqual( 101, self._audit.get_last_batch_id()) self._audit.end_batch( id1 ) id1 = self._audit.start_batch( doc = { "test" : "doc"}) self.assertEqual( 102, self._audit.get_last_batch_id()) self._audit.end_batch( id1 ) def test_pro_batch_id(self):
def test_groups(self): filename = self._analytics.get_groups( [ "DublinMUG" ], "test_groups.json") self.assertTrue( os.path.isfile( filename )) audit = Audit( self._mdb ) self.assertTrue( audit.isProBatch( self._analytics.get_batch_ID())) os.unlink( filename )
''' Created on 4 Oct 2016 @author: jdrumgoole ''' from mugalyser.mongodb import MUGAlyserMongoDB from mugalyser.audit import Audit from flask import Flask, jsonify from flask.templating import render_template app = Flask(__name__) mdb = MUGAlyserMongoDB() auditdb = Audit(mdb) membersCollection = mdb.membersCollection() groupCollection = mdb.groupsCollection() auditCollection = auditdb.auditCollection() def currentBatch(): curBatch = auditCollection.find_one({"name": "Current Batch"}, {"_id": 0}) return jsonify(curBatch) @app.route('/') def index(): #return currentBatch() return "WIP" @app.route('/groups')
def main(argv=None): if argv: sys.argv.extend(argv) try: parser = ArgumentParser() parser.add_argument( "--host", default="mongodb://localhost:27017/MUGS", help="URI for connecting to MongoDB [default: %(default)s]") parser.add_argument("--hasgroup", nargs="+", default=[], help="Is this a MongoDB Group") parser.add_argument("-l", "--listgroups", action="store_true", default=False, help="print out all the groups") parser.add_argument("-v", "--version", action="store_true", default=False, help="print version") parser.add_argument("--members", nargs="+", default=[], help="list all members of a list of groups") parser.add_argument("--distinct", action="store_true", default=False, help="List all distinct members") parser.add_argument("-i", "--memberid", type=int, help="get info for member id") parser.add_argument("--membername", help="get info for member id") parser.add_argument("--upcomingevents", nargs="+", default=[], help="List upcoming events") parser.add_argument("--pastevents", nargs="+", default=[], help="List past events") parser.add_argument("--country", nargs="+", default=[], help="print groups by country") parser.add_argument( "--batches", action="store_true", default=False, help= "List all the batches in the audit database [default: %(default)s]" ) parser.add_argument("--curbatch", action="store_true", default=False, help="Report current batch ID") parser.add_argument("--joined", action="store_true", default=False, help="Report people who joined by year") parser.add_argument("--organizer", nargs="+", default=[], help="List organizers for a specific set of MUGS") parser.add_argument( "--start", help="Range used for fields in which ranges relevant") parser.add_argument( "--finish", help="Range used for fields in which ranges relevant") parser.add_argument("-f", "--format_type", choices=["oneline", "summary", "full"], default="oneline", help="type of output") # Process arguments args = parser.parse_args() mdb = MUGAlyserMongoDB(uri=args.host) members = Members(mdb) if args.version: print("%s muginfo %s" % (__programName__, __version__)) sys.exit(2) if args.curbatch: audit = Audit(mdb) curbatch = audit.getCurrentValidBatchID() print("current batch ID = {'batchID': %i}" % curbatch) if args.memberid: member = members.get_by_ID(args.memberid) if member: pprint(member) print(member["member_name"]) else: print("No such member: %s" % args.memberid) if args.membername: member = members.find_one({"member.member_name": args.membername}) if member: pprint(member) else: print("No such member: %s" % args.membername) for i in args.hasgroup: groups = Groups(mdb) if groups.get_group(i): print("{:40} :is a MongoDB MUG".format(i)) else: print("{:40} :is not a MongoDB MUG".format(i)) if args.listgroups: groups = Groups(mdb) count = 0 for g in groups.get_all_groups(): count = count + 1 print("{:40} (location: {})".format(g["group"]["urlname"], g["group"]["country"])) print("total: %i" % count) if args.country: count = 0 groups = Groups(mdb) country_groups = groups.find({"group.country": args.country}) for g in country_groups: count = count + 1 print("{:20} has MUG: {}".format(g["group"]["urlname"], args.country)) print("total : %i " % count) if args.batches: if not args.host: print("Need to specify --host for batchIDs") sys.exit(2) audit = Audit(mdb) batchIDs = audit.getBatchIDs() for b in batchIDs: print(b) count = 0 if args.members: print("args.members : %s" % args.members) q = Query() if args.start and not args.finish: q.add_range("member.join_time", parse(args.start), datetime.now()) elif not args.start and args.finish: q.add_range("member.join_time", datetime.now(), parse(args.finish)) elif args.start and args.finish: q.add_range("member.join_time", parse(args.start), parse(args.finish)) if "all" in args.members: it = members.get_all_members(q) else: it = members.get_many_group_members(args.members) for i in it: count = count + 1 # # sometimes country is not defined. # country = i["member"].pop("country", "Undefined") if "member_id" in i["member"]: # PRO API member format print(u"{:30}, {:20}, {:20}".format( i["member"]["member_name"], country, i["member"]["member_id"])) else: print(u"{:30}, {:20}, {:20}".format( i["member"]["name"], country, i["member"]["id"])) print("%i total" % count) if args.joined: members = Members(mdb) joined = members.joined_by_year() for i in joined: print(i) if args.distinct: members = Members(mdb) distinct = members.distinct_members() printCount(distinct) if args.upcomingevents: events = UpcomingEvents(mdb) events.count_print( events.get_all_group_events(args.upcomingevents), args.format_type) if args.pastevents: events = PastEvents(mdb) events.count_print(events.get_all_group_events(args.pastevents), args.format_type) if "all" in args.organizer: organizers = Organizers(mdb) members = organizers.get_organizers() organizers.count_print(members, args.format_type) else: organizers = Organizers(mdb) for i in args.organizer: print("Organizer: '%s'" % i) mugs = organizers.get_mugs(i) for m in mugs: print("\t%s" % m["urlname"]) except KeyboardInterrupt: print("Keyboard interrupt : Exiting...") sys.exit(2) except pymongo.errors.ServerSelectionTimeoutError, e: print("Failed to connect to MongoDB Server (server timeout): %s" % e) sys.exit(2)
class Test_audit(unittest.TestCase): def setUp(self): self._mdb = MUGAlyserMongoDB(uri="mongodb://localhost/TEST_AUDIT") self._audit = Audit(self._mdb) def tearDown(self): self._mdb.client().drop_database("TEST_AUDIT") pass #@unittest.skip def test_incrementID(self): batchID = self._audit.incrementBatchID() curID = self._audit.getCurrentBatchID() self.assertEqual(batchID, curID) newID = self._audit.incrementBatchID() self.assertEqual(batchID + 1, newID) def test_getCurrentValidBatchID(self): batchID1 = self._audit.startBatch(doc={"test": "doc"}, trial=True) self._audit.endBatch(batchID1) #self.assertRaises( ValueError, self._audit.getCurrentValidBatchID ) batchID2 = self._audit.startBatch( { "args": "arg list", "version": __programName__ + " " + __version__ }, trial=False, apikey=get_meetup_key()) self._audit.endBatch(batchID2) self.assertEqual(batchID2, self._audit.getCurrentValidBatchID()) batchID3 = self._audit.startBatch(doc={"test": "doc"}, trial=True) self._audit.endBatch(batchID3) self.assertEqual(batchID2, self._audit.getCurrentValidBatchID()) def test_batch(self): batchIDs = [x for x in self._audit.getBatchIDs()] thisBatchID = self._audit.startBatch(doc={"test": "doc"}, trial=True) newBatchIDs = [x for x in self._audit.getBatchIDs()] self.assertEqual(len(batchIDs) + 1, len(newBatchIDs)) self.assertTrue(thisBatchID in newBatchIDs) self._audit.endBatch(thisBatchID) #@unittest.skip def test_IDs(self): self.assertRaises(ValueError, self._audit.getCurrentBatchID) self.assertRaises(ValueError, self._audit.getLastBatchID) self.assertFalse(self._audit.inBatch()) batchID = self._audit.startBatch({}) self.assertTrue(self._audit.inBatch()) self.assertEquals(1, self._audit.getCurrentBatchID()) self._audit.endBatch(batchID) batch = self._audit.getBatch(batchID) self.assertTrue("start" in batch) self.assertTrue("end" in batch) self.assertTrue("info" in batch) self.assertTrue("batchID" in batch) self.assertFalse(self._audit.incomplete(batchID)) batchID = self._audit.startBatch({}) self.assertTrue(self._audit.inBatch()) self.assertEquals(2, self._audit.getCurrentBatchID()) self._audit.endBatch(batchID) self.assertFalse(self._audit.inBatch()) #@unittest.skip def test_start_end_batch(self): batchID = self._audit.startBatch({}) self.assertTrue(self._audit.incomplete(batchID)) self._audit.endBatch(batchID) self.assertFalse(self._audit.incomplete(batchID))
def setUp(self): self._mdb = MUGAlyserMongoDB("mongodb://localhost:27017/TESTMUGS") self._audit = Audit(self._mdb) self._writer = MeetupWriter(self._audit, self._mdb, ["postgresqlrussia"])
def __init__( self, mdb, collection_name ): self._mdb = mdb self._audit = Audit( mdb ) self._collection = mdb.make_collection( collection_name )
class MUGData( object ): def __init__( self, mdb, collection_name ): self._mdb = mdb self._audit = Audit( mdb ) self._collection = mdb.make_collection( collection_name ) def collection(self): return self._collection @staticmethod def filter( cursor, selector, values ): for i in cursor: if i[ selector ] in values: yield i def find_one(self, query=None ): batch_query = { "batchID" : self._audit.get_last_valid_batch_id() } if query is not None: batch_query.update( query ) #pprint.pprint( batch_query ) return self._collection.find_one( batch_query ) def find(self, q=None, *args, **kwargs ): """ :rtype: """ query = { "batchID" : self._audit.get_last_valid_batch_id() } if q : query.update( q ) if args and kwargs : return self._collection.find( query, args, kwargs ) elif args : return self._collection.find( query, args ) elif kwargs: return self._collection.find( query, kwargs ) else: return self._collection.find( query ) def count(self, g ): count = 0 for _ in g: count = count + 1 return count def generator(self, cursor ): for i in cursor: yield i def summary(self, doc ): pass def one_line(self, doc ): pass def full(self, doc ): pass def doc_print(self, doc, format_type = None ): if format_type == "summary" : print( self.summary( doc )) elif format_type == "oneline" : print( self.one_line( doc )) else: pprint.pprint( doc ) def count_print( self, iterator, format_type=None ): count = 0 for i in iterator : count = count + 1 self.doc_print( i, format_type ) print( "Total: %i" % count )
def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv: sys.argv.extend(argv) try: # Setup argument parser parser = ArgumentParser(description=''' Read data from the Meetup API and write it do a MongoDB database. Each run of this program creates a new batch of data identified by a batchID. The default database is MUGS. You can change this by using the --host parameter and specifying a different database in the mongodb URI. If you use the --pro arguement your API key must be a meetup pro account API key. If not the api calls to the pro interface will fail. If you are and adminstrator on the pro account you should use the --admin flag to give you access to the admin APIs. ''') # # MongoDB Args parser.add_argument('--host', default="mongodb://localhost:27017/MUGS", help='URI to connect to : [default: %(default)s]') parser.add_argument("--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") parser.add_argument("-v", "--version", action='version', version=__programName__ + " " + __version__) parser.add_argument( '--trialrun', action="store_true", default=False, help='Trial run, no updates [default: %(default)s]') parser.add_argument( '--mugs', nargs="+", help='Process MUGs list list mugs by name [default: %(default)s]') parser.add_argument( "--pro", default=False, action="store_true", help="use if you have a pro account uses pro API calls") parser.add_argument( "--admin", default=False, action="store_true", help="Some calls are only available to admin users") parser.add_argument( "--database", default="MUGS", help="Default database name to write to [default: %(default)s]") parser.add_argument('--phases', nargs="+", choices=[ "groups", "members", "attendees", "upcomingevents", "pastevents" ], default=["all"], help='execution phases') parser.add_argument( '--loglevel', default="INFO", choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"], help='Logging level [default: %(default)s]') parser.add_argument('--apikey', default=None, help='Default API key for meetup') parser.add_argument( '--urlfile', help= "File containing a list of MUG URLs to be used to parse data [ default: %(default)s]" ) # Process arguments args = parser.parse_args() apikey = "" if args.apikey: apikey = args.apikey else: apikey = get_meetup_key() verbose = args.verbose format_string = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(format=format_string, level=LoggingLevel(args.loglevel)) # Turn off logging for requests logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) if verbose > 0: logging.info("Verbose mode on") if args.urlfile: if not os.path.isfile(args.urlfile): print("No such file --urlfile '%s'" % args.urlfile) sys.exit(1) if args.mugs: mugList = args.mugs else: mugList = [] if args.pro: nopro = False else: nopro = True mdb = MUGAlyserMongoDB(args.host) audit = Audit(mdb) batchID = audit.startBatch( { "args": vars(args), "version": __programName__ + " " + __version__, "pro_account": args.pro }, trial=args.trialrun, apikey=apikey) start = datetime.utcnow() logging.info("Started MUG processing for batch ID: %i", batchID) logging.info("Writing to database : '%s'", mdb.database().name) if nopro: logging.info("Using standard API calls (no pro account API key)") if args.urlfile: logging.info("Reading groups from: '%s'", args.urlfile) with open(args.urlfile) as f: mugList = f.read().splitlines() else: logging.info("Using pro API calls (pro account API key)") if nopro: logging.info("Processing %i MUG URLS", len(mugList)) else: mugList = list(MeetupAPI().get_pro_group_names()) writer = MeetupWriter(audit, mdb, mugList, apikey) if "all" in args.phases: phases = ["groups", "members", "upcomingevents", "pastevents"] if args.admin: phases.append("attendees") else: phases = args.phases if "groups" in phases: logging.info("processing group info for %i groups: nopro=%s", len(mugList), nopro) writer.processGroups(nopro) phases.remove("groups") if "members" in phases: logging.info("processing members info for %i groups: nopro=%s", len(mugList), nopro) writer.processMembers(nopro) phases.remove("members") for i in mugList: writer.capture_snapshot(i, args.admin, phases) audit.endBatch(batchID) end = datetime.utcnow() elapsed = end - start logging.info("MUG processing took %s for BatchID : %i", elapsed, batchID) except KeyboardInterrupt: print("Keyboard interrupt : Exiting...") sys.exit(2) except pymongo.errors.ServerSelectionTimeoutError, e: print("Failed to connect to MongoDB Server (server timeout): %s" % e) sys.exit(2)
def mugalyser(argv=None): # IGNORE:C0111 '''Command line options.''' try: # Setup argument parser parser = ArgumentParser(description=''' Read data from the Meetup API and write it do a MongoDB database. Each run of this program creates a new batch of data identified by a batchID. The default database is MUGS. You can change this by using the --host parameter and specifying a different database in the mongodb URI. If you use the --pro arguement your API key must be a meetup pro account API key. If not the api calls to the pro interface will fail. If you are and adminstrator on the pro account you should use the --admin flag to give you access to the admin APIs. ''') # # MongoDB Args parser.add_argument('--host', default="mongodb://localhost:27017/MUGS", help='URI to connect to : [default: %(default)s]') parser.add_argument("-v", "--version", action='version', version=__programName__ + " " + __version__) parser.add_argument( '--mugs', nargs="+", default=[], help='Process MUGs list list mugs by name [default: %(default)s]') parser.add_argument("--collect", choices=["pro", "nopro", "all"], default="all", help="Use pro API calls, no pro API calls or both") parser.add_argument( "--admin", default=False, action="store_true", help= "Some calls are only available to admin users, use this if you are not an admin" ) parser.add_argument( "--database", default="MUGS", help="Default database name to write to [default: %(default)s]") parser.add_argument('--phases', nargs="+", choices=[ "groups", "members", "attendees", "upcomingevents", "pastevents" ], default=["all"], help='execution phases') parser.add_argument( '--loglevel', default="INFO", choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"], help='Logging level [default: %(default)s]') parser.add_argument('--apikey', default=None, help='Default API key for meetup') parser.add_argument("--batchname", default=__programName__, help="Batch name used in creating audit batches") parser.add_argument( '--urlfile', help= "File containing a list of MUG URLs to be used to parse data [ default: %(default)s]" ) parser.add_argument( "--drop", default=False, action="store_true", help="drop the database before writing data [default: %(default)s]" ) parser.add_argument("--organizer_id", type=int, help="Organizer ID is required for non pro groups") # Process arguments args = parser.parse_args(argv) apikey = "" if args.apikey: apikey = args.apikey else: apikey = get_meetup_key() mugalyser_logger = Logger(__programName__, args.loglevel) # mugalyser_logger.add_stream_handler( args.loglevel ) mugalyser_logger.add_file_handler("mugalyser.log", args.loglevel) api = MeetupAPI(apikey, reshape=True) logger = mugalyser_logger.log() # Turn off logging for requests logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) mdb = MUGAlyserMongoDB(uri=args.host, database_name=args.database) if args.drop: logger.warn(f"Dropping database:'{args.database}'") mdb.drop(args.database) audit = Audit(mdb) batchID = audit.start_batch({ "args": vars(args), "version": __programName__ + " " + __version__, "name": args.batchname }) start = datetime.utcnow() logger.info("Started MUG processing for batch ID: %i", batchID) logger.info("Writing to database : '%s'", mdb.database().name) group_dict = {} count = 0 group_list = [] if args.mugs: for url in args.mugs: group_list.append(api.get_group(url)) else: group_list = list(api.get_groups()) for url, group in group_list: #print(f"Checking:{group['urlname']}") urlname = group['urlname'] url, full_group = api.get_group(urlname) if args.collect in ["pro", "all"]: if "pro_network" in full_group and full_group["pro_network"][ "name"] == "MongoDB": count = count + 1 logger.info( f"{count}. Processing pro group: {group['urlname']}") group_dict[urlname] = full_group if args.collect in ["nopro", "all"]: if args.organizer_id: if full_group["organizer"]["id"] == args.organizer_id: count = count + 1 logger.info( f"{count}. Processing normal group: {group['urlname']}" ) group_dict[urlname] = full_group else: logger.error( "You must specify --organizer_id when collecting nopro groups" ) sys.exit(1) if args.urlfile: urlfile = os.path.abspath(args.urlfile) logger.info("Reading groups from: '%s'", urlfile) with open(urlfile) as f: lines = f.read().splitlines() # string comments regex = "^\s*#.*|^\s*$" # comments with # or blank lines for i in lines: clean_line = i.rstrip() if not re.match(regex, clean_line): group_dict[clean_line] = None # scoop up any command line args for i in args.mugs: group_dict[i] = None writer = MeetupWriter(apikey, batchID, mdb, reshape=True) if "all" in args.phases: phases = ["groups", "members", "upcomingevents", "pastevents"] else: phases = args.phases if args.admin: logger.info("--admin : we will collect attendee info") phases.append("attendees") else: logger.info("No admin account") logger.info( "We will not collect attendee info: ignoring attendees") logger.info("Processing phases: %s", phases) if "groups" in phases: logger.info("processing group info for %i groups: collect=%s", len(group_dict), args.collect) writer.write_groups(group_dict.keys()) phases.remove("groups") if "members" in phases: logger.info("processing members info for %i groups: collect=%s", len(group_dict), args.collect) writer.write_members(group_dict.keys()) phases.remove("members") for i in group_dict.keys(): writer.capture_snapshot(i, args.admin, phases) audit.end_batch(batchID) end = datetime.utcnow() elapsed = end - start logger.info("MUG processing took %s for BatchID : %i", elapsed, batchID) except KeyboardInterrupt: print("Keyboard interrupt : Exiting...") sys.exit(2) except pymongo.errors.ServerSelectionTimeoutError as e: print("Failed to connect to MongoDB Server (server timeout): %s" % e) sys.exit(2)