class EdxForumScrubber(object): ''' Given a .bson file of OpenEdX Forum posts, load the file into a MongoDB. Then pull a post at a time, anonymize, and insert a selection of fields into a MySQL db. The MongoDb entries look like this:: { "_id" : ObjectId("51b75a48f359c40a00000028"), "_type" : "Comment", "abuse_flaggers" : [ ], "anonymous" : false, "anonymous_to_peers" : false, "at_position_list" : [ ], "author_id" : "26344", "author_username" : "Minelly48", "body" : "I am Gwen.I am a nursing professor who took statistics many years ago and want to refresh my knowledge.", "comment_thread_id" : ObjectId("51b754e5f359c40a0000001d"), "course_id" : "Medicine/HRP258/Statistics_in_Medicine", "created_at" : ISODate("2013-06-11T17:11:36.831Z"), "endorsed" : false, "historical_abuse_flaggers" : [ ], "parent_ids" : [ ], "updated_at" : ISODate("2013-06-11T17:11:36.831Z"), "visible" : true, "votes" : { "count" : 2, "down" : [ ], "down_count" : 0, "point" : 2, "up" : [ "40325", "20323" ], "up_count" : 2 }, "sk" : "51b75a48f359c40a00000028" } Depending on parameter allowAnonScreenName in the __init__() method, forum entries in the relational database will be associated with the same hash that is used to anonymize other parts of the OpenEdX data. ''' LOG_DIR = '/home/dataman/Data/EdX/NonTransformLogs' # Pattern for email id - strings of alphabets/numbers/dots/hyphens followed # by an @ or at followed by combinations of dot/. followed by the edu/com # also, allow for spaces emailPattern='(.*)\s+([a-zA-Z0-9\(\.\-]+)[@]([a-zA-Z0-9\.]+)(.)(edu|com)\\s*(.*)' #emailPattern='(.*)\\s+([a-zA-Z0-9\\.]+)\\s*(\\(f.*b.*)?(@)\\s*([a-zA-Z0-9\\.\\s;]+)\\s*(\\.)\\s*(edu|com)\\s+(.*)' compiledEmailPattern = re.compile(emailPattern); # Pattern for replacing embedded double quotes in post bodies, # unless they are already escaped w/ a backslash. The # {0,1} means a match if zero or one repetition. It's # needed so that double quotes at the very start of a # string are matched: no preceding character at all: #doublQuoteReplPattern = re.compile(r'[^\\]{0,1}"') doublQuoteReplPattern = re.compile(r'[\\]{0,}"') # Schema of EdxForum.contents: an ordered dict that is # used twice: the table creation MySQL command is constructed # from this dict, and the dict is used to ensure that # all its keys (i.e. future column names) are present # in each MongoDB object. See also createForumTable(). # In createForumTable() either entry anon_screen_name, # or screen_name in the dict below will be deleted, based # on whether we are asked to anonymize or not: forumSchema = OrderedDict({}) forumSchema['forum_post_id'] = "varchar(40) NOT NULL DEFAULT 'unavailable'" forumSchema['anon_screen_name'] = "varchar(40) NOT NULL DEFAULT 'anon_screen_name_redacted'" # This or next deleted based on anonymize yes/no forumSchema['screen_name'] = "varchar(40) NOT NULL DEFAULT 'anon_screen_name_redacted'" # This or prev deleted based on anonymize yes/no forumSchema['type'] = "varchar(20) NOT NULL" forumSchema['anonymous'] = "varchar(10) NOT NULL" forumSchema['anonymous_to_peers'] = "varchar(10) NOT NULL" forumSchema['at_position_list'] = "varchar(200) NOT NULL" forumSchema['forum_uid'] = "varchar(40) NOT NULL" forumSchema['body'] = "TEXT NOT NULL" #"varchar(2500) NOT NULL" forumSchema['course_display_name'] = "varchar(100) NOT NULL" forumSchema['created_at'] = "datetime NOT NULL" forumSchema['votes'] = "TEXT NOT NULL" # "varchar(200) NOT NULL" forumSchema['count'] = "int(11) NOT NULL" forumSchema['down_count'] = "int(11) NOT NULL" forumSchema['up_count'] = "int(11) NOT NULL" forumSchema['up'] = "varchar(200) DEFAULT NULL" forumSchema['down'] = "varchar(200) DEFAULT NULL" forumSchema['comment_thread_id'] = "varchar(255) DEFAULT NULL" forumSchema['parent_id'] = "varchar(255) DEFAULT NULL" forumSchema['parent_ids'] = "varchar(255) DEFAULT NULL" forumSchema['sk'] = "varchar(255) DEFAULT NULL" forumSchema['confusion'] = "varchar(20) NOT NULL DEFAULT ''" forumSchema['happiness'] = "varchar(20) NOT NULL DEFAULT ''" def __init__(self, bsonFileName, mysqlDbObj=None, forumTableName='contents', allUsersTableName='EdxPrivate.UserGrade', anonymize=True, allowAnonScreenName=False): ''' Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired), and place them into a MySQL table. :param bsonFileName: full path the .bson table. Set to None if instantiating for unit testing. :type bsonFileName: String :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are to be placed. If None, a new such object is created into MySQL db 'EdxForum' :type mysqlDbObj: MySQLDB :param forumTableName: name of table into which anonymized Forum entries are to be placed :type forumTableName: String :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names of users who post to the Forum. Used to redact their names from their own posts. :type allUsersTable: String :param anonymize: If true, Forum post entries in the MySQL table will be anonymized :type anonymize: bool :param allow_anon_screen_name: if True, then occurrences of poster's name in post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name is the hash used in other tables of the OpenEdX data. :type allow_anon_screen_name: Bool ''' self.bsonFileName = bsonFileName self.forumTableName = forumTableName self.forumDbName = 'EdxForum' self.allUsersTableName = allUsersTableName self.anonymize = anonymize self.allowAnonScreenName = allowAnonScreenName # If not unittest, but regular run, then mysqlDbObj is None if mysqlDbObj is None: self.mysql_passwd = self.getMySQLPasswd() self.mysql_dbhost ='localhost' self.mysql_user = getpass.getuser() # mySQLUser that started this process self.mydb = MySQLDB(user=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName) else: self.mydb = mysqlDbObj self.counter=0 self.userCache = {} self.userSet = set() warnings.filterwarnings('ignore', category=MySQLdb.Warning) self.setupLogging() self.prepDatabase() #******mysqldb.commit(); #******logging.info('commit completed!') def runConversion(self): ''' Do the actual work. We don't call this method from __init__() so that unittests can create an EdxForumScrubber instance without doing the actual work. Instead, unittests call individual methods. ''' self.populateUserCache(); self.mongo_database_name = 'TmpForum' self.collection_name = 'contents' # Load bson file into Mongodb: self.loadForumIntoMongoDb(self.bsonFileName) self.mongodb = MongoDB(dbName=self.mongo_database_name, collection=self.collection_name) # Anonymize each forum record, and transfer to MySQL db: self.forumMongoToRelational(self.mongodb, self.mydb,'contents' ) self.mydb.close() self.mongodb.close() self.logInfo('Entered %d records into %s' % (self.counter, self.forumDbName + '.' + self.forumTableName)) def loadForumIntoMongoDb(self, bsonFilename): mongoclient = MongoClient(); db = mongoclient[self.mongo_database_name]; # Get collection object: collection = db[self.collection_name]; # Clear out any old forum entries: self.logInfo('Preparing to delete the collection ') collection.remove() self.logInfo('Deleting mongo collection completed. Will now attempt a mongo restore') self.logInfo('Spawning subprocess to execute mongo restore') with open(self.logFilePath,'w') as outfile: ret = subprocess.call( ['mongorestore', '--drop', '--db', self.mongo_database_name, '--collection', self.collection_name, bsonFilename], stdout=outfile, stderr=outfile) self.logDebug('Return value from mongorestore is %s' % (ret)) objCount = subprocess.check_output( ['mongo', '--quiet', '--eval', 'printjson(db.contents.count())', self.mongo_database_name, ], stderr=outfile) self.numMongoItems = objCount self.logInfo('Available Forum posts %s' % objCount) def forumMongoToRelational(self, mongodb, mysqlDbObj, mysqlTable): ''' Given a pymongo collection object in which Forum posts are stored, and a MySQL db object and table name, anonymize each mongo record, and insert it into the MySQL table. :param collection: collection object obtained via a mangoclient object :type collection: Collection :param mysqlDbObj: wrapper to MySQL db. See pymysql_utils.py :type mysqlDbObj: MYSQLDB :param mysqlTable: name of table where posts are to be deposited. Example: 'contents'. :type mysqlTable: String ''' #command = 'mongorestore %s -db %s -mongoForumRec %s'%(self.bson_filename,self.mongo_database_name,self.collection_name) #print command self.logInfo('Will start inserting from mongo collection to MySQL') for mongoForumRec in mongodb.query({}): mongoRecordObj = MongoRecord(mongoForumRec) try: # Check whether 'up' can be converted to a list list(mongoRecordObj['up']) except Exception as e: self.logInfo("Error in conversion of 'up' field to a list (setting cell to -1):" + `e`) mongoRecordObj['up'] ='-1' # Make sure the MongoDB object has all fields that will # be needed for the forum schema: self.ensureSchemaAdherence(mongoRecordObj) self.insert_content_record(mysqlDbObj, mysqlTable, mongoRecordObj); def prepDatabase(self): ''' Declare variables and execute statements preparing the database to configure options - e.g.: setting char set to utf, connection type to utf truncating the already existing table. ''' try: self.logDebug("Setting and assigning char set for mysqld. will truncate old values") self.mydb.execute('SET NAMES utf8;'); self.mydb.execute('SET CHARACTER SET utf8;'); self.mydb.execute('SET character_set_connection=utf8;'); # Compose fully qualified table name from the db name to # which self.mydb is connected, and the forum table name # that was established in __init__(): fullTblName = self.mydb.dbName() + '.' + self.forumTableName # Clear old forum data out of the table: try: self.mydb.dropTable(fullTblName) # Create MySQL table for the posts. If we are to # anonymize, the poster name column will be 'screen_name', # else it will be 'anon_screen_name': self.createForumTable(self.anonymize) self.logDebug("setting and assigning char set complete. Truncation succeeded") except ValueError as e: self.logDebug("Failed either to set character codes, or to create forum table %s: %s" % (fullTblName, `e`)) except MySQLdb.Error,e: self.logInfo("MySql Error exiting %d: %s" % (e.args[0],e.args[1])) # print e sys.exit(1)
class EdxForumScrubber(object): ''' Given a .bson file of OpenEdX Forum posts, load the file into a MongoDB. Then pull a post at a time, anonymize, and insert a selection of fields into a MySQL db. The MongoDb entries look like this:: { "_id" : ObjectId("51b75a48f359c40a00000028"), "_type" : "Comment", "abuse_flaggers" : [ ], "anonymous" : false, "anonymous_to_peers" : false, "at_position_list" : [ ], "author_id" : "26344", "author_username" : "Minelly48", "body" : "I am Gwen.I am a nursing professor who took statistics many years ago and want to refresh my knowledge.", "comment_thread_id" : ObjectId("51b754e5f359c40a0000001d"), "course_id" : "Medicine/HRP258/Statistics_in_Medicine", "created_at" : ISODate("2013-06-11T17:11:36.831Z"), "endorsed" : false, "historical_abuse_flaggers" : [ ], "parent_ids" : [ ], "updated_at" : ISODate("2013-06-11T17:11:36.831Z"), "visible" : true, "votes" : { "count" : 2, "down" : [ ], "down_count" : 0, "point" : 2, "up" : [ "40325", "20323" ], "up_count" : 2 }, "sk" : "51b75a48f359c40a00000028" } Depending on parameter allowAnonScreenName in the __init__() method, forum entries in the relational database will be associated with the same hash that is used to anonymize other parts of the OpenEdX data. ''' LOG_DIR = '/home/dataman/Data/EdX/NonTransformLogs' # Pattern for email id - strings of alphabets/numbers/dots/hyphens followed # by an @ or at followed by combinations of dot/. followed by the edu/com # also, allow for spaces emailPattern='(.*)\s+([a-zA-Z0-9\(\.\-]+)[@]([a-zA-Z0-9\.]+)(.)(edu|com)\\s*(.*)' #emailPattern='(.*)\\s+([a-zA-Z0-9\\.]+)\\s*(\\(f.*b.*)?(@)\\s*([a-zA-Z0-9\\.\\s;]+)\\s*(\\.)\\s*(edu|com)\\s+(.*)' compiledEmailPattern = re.compile(emailPattern); # Pattern for replacing embedded double quotes in post bodies, # unless they are already escaped w/ a backslash. The # {0,1} means a match if zero or one repetition. It's # needed so that double quotes at the very start of a # string are matched: no preceding character at all: #doublQuoteReplPattern = re.compile(r'[^\\]{0,1}"') doublQuoteReplPattern = re.compile(r'[\\]{0,}"') def __init__(self, bsonFileName, mysqlDbObj=None, forumTableName='contents', allUsersTableName='EdxPrivate.UserGrade', anonymize=True, allowAnonScreenName=False): ''' Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired), and place them into a MySQL table. :param bsonFileName: full path the .bson table. Set to None if instantiating for unit testing. :type bsonFileName: String :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are to be placed. If None, a new such object is created into MySQL db 'EdxForum' :type mysqlDbObj: MySQLDB :param forumTableName: name of table into which anonymized Forum entries are to be placed :type forumTableName: String :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names of users who post to the Forum. Used to redact their names from their own posts. :type allUsersTable: String :param anonymize: If true, Forum post entries in the MySQL table will be anonymized :type anonymize: bool :param allow_anon_screen_name: if True, then occurrences of poster's name in post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name is the hash used in other tables of the OpenEdX data. :type allow_anon_screen_name: Bool ''' self.bsonFileName = bsonFileName self.forumTableName = forumTableName self.forumDbName = 'EdxForum' self.allUsersTableName = allUsersTableName self.anonymize = anonymize self.allowAnonScreenName = allowAnonScreenName # If not unittest, but regular run, then mysqlDbObj is None if mysqlDbObj is None: self.mysql_passwd = self.getMySQLPasswd() self.mysql_dbhost ='localhost' self.mysql_user = getpass.getuser() # mySQLUser that started this process self.mydb = MySQLDB(mySQLUser=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName) else: self.mydb = mysqlDbObj self.counter=0 self.userCache = {} self.userSet = set() warnings.filterwarnings('ignore', category=MySQLdb.Warning) self.setupLogging() self.prepDatabase() #******mysqldb.commit(); #******logging.info('commit completed!') def runConversion(self): ''' Do the actual work. We don't call this method from __init__() so that unittests can create an EdxForumScrubber instance without doing the actual work. Instead, unittests call individual methods. ''' self.populateUserCache(); self.mongo_database_name = 'TmpForum' self.collection_name = 'contents' # Load bson file into Mongodb: self.loadForumIntoMongoDb(self.bsonFileName) self.mongodb = MongoDB(dbName=self.mongo_database_name, collection=self.collection_name) # Anonymize each forum record, and transfer to MySQL db: self.forumMongoToRelational(self.mongodb, self.mydb,'contents' ) self.mydb.close() self.mongodb.close() self.logInfo('Entered %d records into %s' % (self.counter, self.forumDbName + self.forumTableName)) def loadForumIntoMongoDb(self, bsonFilename): mongoclient = MongoClient(); db = mongoclient[self.mongo_database_name]; # Get collection object: collection = db[self.collection_name]; # Clear out any old forum entries: self.logInfo('Preparing to delete the collection ') collection.remove() self.logInfo('Deleting mongo collection completed. Will now attempt a mongo restore') self.logInfo('Spawning subprocess to execute mongo restore') with open(self.logFilePath,'w') as outfile: ret = subprocess.call( ['mongorestore', '--drop', '--db', self.mongo_database_name, '--collection', self.collection_name, bsonFilename], stdout=outfile, stderr=outfile) self.logDebug('Return value from mongorestore is %s' % (ret)) objCount = subprocess.check_output( ['mongo', '--quiet', '--eval', 'printjson(db.contents.count())', self.mongo_database_name, ], stderr=outfile) self.numMongoItems = objCount self.logInfo('Available Forum posts %s' % objCount) def forumMongoToRelational(self, mongodb, mysqlDbObj, mysqlTable): ''' Given a pymongo collection object in which Forum posts are stored, and a MySQL db object and table name, anonymize each mongo record, and insert it into the MySQL table. :param collection: collection object obtained via a mangoclient object :type collection: Collection :param mysqlDbObj: wrapper to MySQL db. See pymysql_utils.py :type mysqlDbObj: MYSQLDB :param mysqlTable: name of table where posts are to be deposited. Example: 'contents'. :type mysqlTable: String ''' #command = 'mongorestore %s -db %s -mongoForumRec %s'%(self.bson_filename,self.mongo_database_name,self.collection_name) #print command self.logInfo('Will start inserting from mongo collection to MySQL') for mongoForumRec in mongodb.query({}): mongoRecordObj = MongoRecord(mongoForumRec) try: # Check whether 'up' can be converted to a list list(mongoRecordObj['up']) except Exception as e: self.logInfo('Error in conversion' + `e`) mongoRecordObj['up'] ='-1' self.insert_content_record(mysqlDbObj, mysqlTable, mongoRecordObj); def prepDatabase(self): ''' Declare variables and execute statements preparing the database to configure options - e.g.: setting char set to utf, connection type to utf truncating the already existing table. ''' try: self.logDebug("Setting and assigning char set for mysqld. will truncate old values") self.mydb.execute('SET NAMES utf8;'); self.mydb.execute('SET CHARACTER SET utf8;'); self.mydb.execute('SET character_set_connection=utf8;'); # Compose fully qualified table name from the db name to # which self.mydb is connected, and the forum table name # that was established in __init__(): fullTblName = self.mydb.dbName() + '.' + self.forumTableName # Clear old forum data out of the table: try: self.mydb.dropTable(fullTblName) # Create MySQL table for the posts. If we are to # anonymize, the poster name column will be 'screen_name', # else it will be 'anon_screen_name': self.createForumTable(self.anonymize) self.logDebug("setting and assigning char set complete. Truncation succeeded") except ValueError as e: self.logDebug("Failed either to set character codes, or to create forum table %s: %s" % (fullTblName, `e`)) except MySQLdb.Error,e: self.logInfo("MySql Error exiting %d: %s" % (e.args[0],e.args[1])) # print e sys.exit(1)
class MongoTest(unittest.TestCase): ''' Test the mongodb.py module. Uses a library that fakes a MongoDB server. See https://pypi.python.org/pypi/mongomock/1.0.1 ''' def setUp(self): self.objs = [{ "fname": "Franco", "lname": "Corelli" }, { "fname": "Leonardo", "lname": "DaVinci", "age": 300 }, { "fname": "Franco", "lname": "Gandolpho" }] self.mongodb = MongoDB(dbName='unittest', collection='unittest') self.mongodb.clearCollection(collection="unittest") self.mongodb.clearCollection(collection="new_coll") self.mongodb.setCollection("unittest") def tearDown(self): self.mongodb.dropCollection(collection='unittest') self.mongodb.dropCollection(collection='new_coll') self.mongodb.close() @unittest.skipIf(not TEST_ALL, "Skipping") def test_update_and_find_one(self): self.mongodb.insert(self.objs[0]) # Get a generator for the results: resGen = self.mongodb.query({"fname": "Franco"}, limit=1, collection="unittest") res = resGen.next() self.assertEqual( 'Corelli', res['lname'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Corelli', res['lname'])) @unittest.skipIf(not TEST_ALL, "Skipping") def test_set_coll_use_different_coll(self): # Insert into unittest: self.mongodb.insert(self.objs[0]) # Switch to new_coll: self.mongodb.setCollection('new_coll') self.mongodb.insert({"recommendation": "Hawaii"}) # We're in new_coll; the following should be empty result: self.mongodb.query({"fname": "Franco"}, limit=1) resCount = self.mongodb.resultCount({"fname": "Franco"}) self.assertIsNone( resCount, "Got non-null result that should be null: %s" % resCount) # But this search is within new_coll, and should succeed: resGen = self.mongodb.query({"recommendation": { '$regex': '.*' }}, limit=1) res = resGen.next() self.assertEqual( 'Hawaii', res['recommendation'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Hawaii', res['recommendation'])) # Try inline collection switch: resGen = self.mongodb.query({"fname": "Franco"}, limit=1, collection="unittest") res = resGen.next() self.assertEqual( 'Corelli', res['lname'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Corelli', res['lname'])) # But the default collection should still be new_coll, # so a search with unspecified coll should be in new_coll: resGen = self.mongodb.query({"recommendation": { '$regex': '.*' }}, limit=1) res = resGen.next() self.assertEqual( 'Hawaii', res['recommendation'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Hawaii', res['recommendation'])) @unittest.skipIf(not TEST_ALL, "Skipping") def test_multi_result(self): # Insert two docs with fname == Franco: self.mongodb.insert(self.objs[0]) self.mongodb.insert(self.objs[2]) resGen = self.mongodb.query({"fname": "Franco"}) # To get result count, must retrieve at least one result first: resGen.next() resCount = self.mongodb.resultCount({"fname": "Franco"}) if resCount != 2: self.fail("Added two Franco objects, but only %s are found." % str(resCount)) @unittest.skipIf(not TEST_ALL, "Skipping") def test_clear_collection(self): self.mongodb.insert({"foo": 10}) resGen = self.mongodb.query({"foo": 10}, limit=1) res = resGen.next() self.assertIsNotNone(res, "Did not find document that was just inserted.") self.mongodb.clearCollection() resGen = self.mongodb.query({"foo": 10}, limit=1) self.assertRaises(StopIteration, resGen.next) @unittest.skipIf(not TEST_ALL, "Skipping") def test_only_some_return_columns(self): # Also tests the suppression of _id col when desired: self.mongodb.insert(self.objs[0]) self.mongodb.insert(self.objs[1]) resGen = self.mongodb.query({}, ("lname")) names = [] for lnameDict in resGen: resCount = self.mongodb.resultCount({}) self.assertEqual(2, resCount) names.append(lnameDict['lname']) self.assertItemsEqual(['Corelli', 'DaVinci'], names, "Did not receive expected lnames: %s" % str(names))
class MongoTest(unittest.TestCase): ''' Test the mongodb.py module. Uses a library that fakes a MongoDB server. See https://pypi.python.org/pypi/mongomock/1.0.1 ''' def setUp(self): self.objs = [{"fname" : "Franco", "lname" : "Corelli"}, {"fname" : "Leonardo", "lname" : "DaVinci", "age" : 300}, {"fname" : "Franco", "lname" : "Gandolpho"}] self.mongodb = MongoDB(dbName='unittest', collection='unittest') self.mongodb.clearCollection(collection="unittest") self.mongodb.clearCollection(collection="new_coll") self.mongodb.setCollection("unittest") def tearDown(self): self.mongodb.dropCollection(collection='unittest') self.mongodb.dropCollection(collection='new_coll') self.mongodb.close() @unittest.skipIf(not TEST_ALL, "Skipping") def test_update_and_find_one(self): self.mongodb.insert(self.objs[0]) # Get a generator for the results: resGen = self.mongodb.query({"fname" : "Franco"}, limit=1, collection="unittest") res = resGen.next() self.assertEqual('Corelli', res['lname'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Corelli', res['lname'])) @unittest.skipIf(not TEST_ALL, "Skipping") def test_set_coll_use_different_coll(self): # Insert into unittest: self.mongodb.insert(self.objs[0]) # Switch to new_coll: self.mongodb.setCollection('new_coll') self.mongodb.insert({"recommendation" : "Hawaii"}) # We're in new_coll; the following should be empty result: self.mongodb.query({"fname" : "Franco"}, limit=1) resCount = self.mongodb.resultCount({"fname" : "Franco"}) self.assertIsNone(resCount, "Got non-null result that should be null: %s" % resCount) # But this search is within new_coll, and should succeed: resGen = self.mongodb.query({"recommendation" : {'$regex' : '.*'}}, limit=1) res = resGen.next() self.assertEqual('Hawaii', res['recommendation'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Hawaii', res['recommendation'])) # Try inline collection switch: resGen = self.mongodb.query({"fname" : "Franco"}, limit=1, collection="unittest") res = resGen.next() self.assertEqual('Corelli', res['lname'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Corelli', res['lname'])) # But the default collection should still be new_coll, # so a search with unspecified coll should be in new_coll: resGen = self.mongodb.query({"recommendation" : {'$regex' : '.*'}}, limit=1) res = resGen.next() self.assertEqual('Hawaii', res['recommendation'], "Failed retrieval of single obj; expected '%s' but got '%s'" % ('Hawaii', res['recommendation'])) @unittest.skipIf(not TEST_ALL, "Skipping") def test_multi_result(self): # Insert two docs with fname == Franco: self.mongodb.insert(self.objs[0]) self.mongodb.insert(self.objs[2]) resGen = self.mongodb.query({"fname" : "Franco"}) # To get result count, must retrieve at least one result first: resGen.next() resCount = self.mongodb.resultCount({"fname" : "Franco"}) if resCount != 2: self.fail("Added two Franco objects, but only %s are found." % str(resCount)) @unittest.skipIf(not TEST_ALL, "Skipping") def test_clear_collection(self): self.mongodb.insert({"foo" : 10}) resGen = self.mongodb.query({"foo" : 10}, limit=1) res = resGen.next() self.assertIsNotNone(res, "Did not find document that was just inserted.") self.mongodb.clearCollection() resGen = self.mongodb.query({"foo" : 10}, limit=1) self.assertRaises(StopIteration, resGen.next) @unittest.skipIf(not TEST_ALL, "Skipping") def test_only_some_return_columns(self): # Also tests the suppression of _id col when desired: self.mongodb.insert(self.objs[0]) self.mongodb.insert(self.objs[1]) resGen = self.mongodb.query({}, ("lname")) names = [] for lnameDict in resGen: resCount = self.mongodb.resultCount({}) self.assertEqual(2, resCount) names.append(lnameDict['lname']) self.assertItemsEqual(['Corelli','DaVinci'], names, "Did not receive expected lnames: %s" % str(names))