def tearDown(self): self.db = MySQLDB(user='******', passwd='', db='unittest') # Can't drop tables: hangs #self.db.dropTable('StudentmoduleExcerpt') #self.db.dropTable('ActivityGrade') self.db.close() pass
def testWithMySQLPassword(self): try: # Set a password for the unittest user: if self.mysql_ge_5_7: self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = 'foobar'") else: self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = PASSWORD('foobar')") self.mysqldb.close() # We should be unable to log in without a pwd: with self.assertRaises(ValueError): self.mysqldb = MySQLDB(host='localhost', user='******', db='unittest') # Open new pymysql_db.MySQLDb instance, supplying pwd: self.mysqldb = MySQLDB(host='localhost', user='******', passwd='foobar', db='unittest') # Do a test query: self.buildSmallDb() res = self.mysqldb.query("SELECT col2 FROM unittest WHERE col1 = 10;").next() self.assertEqual(res, 'col1') # Bulk insert is also different for pwd vs. none: self.testBulkInsert() finally: # Make sure the remove the pwd from user unittest, # so that other tests will run successfully: if self.mysql_ge_5_7: self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = ''") else: self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = PASSWORD('')")
def testQueryIterator(self): self.buildSmallDb() for rowNum, result in enumerate(self.mysqldb.query('SELECT col1,col2 FROM unittest')): if rowNum == 0: self.assertEqual((10, 'col1'), result) elif rowNum == 1: self.assertEqual((20, 'col2'), result) elif rowNum == 2: self.assertEqual((30, 'col3'), result) # Test the dict cursor self.mysqldb.close() self.mysqldb = MySQLDB(host='localhost', user='******', db='unittest', cursor_class=Cursors.DICT) for result in self.mysqldb.query('SELECT col1,col2 FROM unittest'): self.assertIsInstance(result, dict) if result['col1'] == 10: self.assertEqual(result['col2'], 'col1') elif result['col1'] == 20: self.assertEqual(result['col2'], 'col2') elif result['col1'] == 30: self.assertEqual(result['col2'], 'col3')
def setUp(self): self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[ 0] for colName in TestAddAnonToActivityGrade.studentmoduleExcerptColNames[ 1:]: self.allColNames += ',' + colName self.db = MySQLDB(user='******', passwd='', db='unittest') self.db.dropTable('StudentmoduleExcerpt') self.db.createTable( 'StudentmoduleExcerpt', TestAddAnonToActivityGrade.studentmoduleExcerptSchema, temporary=False) #***temporary=True) self.db.bulkInsert( 'StudentmoduleExcerpt', TestAddAnonToActivityGrade.studentmoduleExcerptColNames, TestAddAnonToActivityGrade.studentmoduleExcerptValues) self.db.createTable( 'ActivityGrade', TestAddAnonToActivityGrade.studentmoduleExcerptSchema) # Make sure there isn't left over content (if the table existed): self.db.truncateTable('ActivityGrade') # Rudimentary UserGrade table: self.db.dropTable('UserGrade') self.db.createTable('UserGrade', TestAddAnonToActivityGrade.userGradeExcerptSchema, temporary=False) self.db.bulkInsert('UserGrade', TestAddAnonToActivityGrade.userGradeExcerptColNames, TestAddAnonToActivityGrade.userGradeExcerptValues) self.db.close()
def __init__(self, extIdsFileName): user = '******' # Try to find pwd in specified user's $HOME/.ssh/mysql currUserHomeDir = os.getenv('HOME') if currUserHomeDir is None: pwd = None else: try: # Need to access MySQL db as its 'root': with open(os.path.join(currUserHomeDir, '.ssh/mysql_root')) as fd: pwd = fd.readline().strip() # Switch user to 'root' b/c from now on it will need to be root: user = '******' except IOError: # No .ssh subdir of user's home, or no mysql inside .ssh: pwd = None self.db = MySQLDB(user=user, passwd=pwd, db='Misc') self.makeTmpExtsTable() self.loadExtIds(extIdsFileName) outfile = tempfile.NamedTemporaryFile(prefix='extsIntsScreenNames', suffix='.csv', delete=True) # Need to close this file, and thereby delete it, # so that MySQL is willing to write to it. Yes, # that's a race condition. But this is an # admin script, run by one person: outfile.close() self.findScreenNames(outfile.name) self.computeAnonFromScreenNames(outfile.name)
def __init__(self, logFile, uid, pwd, tsvFileName, screenNamePos): ''' Make connection to MySQL wrapper. @param logFile: file where log entries will be appended. @type logFile: String @param uid: MySQL user under which to log in. Assumed to be other than None @type uid: String @param pwd: MySQL password for user uid. May be None. @type pwd: {String | None} @param tsvFileName: name of TSV file where rows of edxprod's certificates_generatedcertificate table are located. It is assumed that the caller verified existence and readability of this file. @type String @param screenNamePos: Zero-origin position of the screen name column in the TSV file from certificates_generatedcertificate @type screenNamePos: int ''' self.uid = uid self.pwd = pwd self.tsvFileName = tsvFileName self.screenNamePos = screenNamePos self.logFile = logFile if pwd is None: self.mysqldb = MySQLDB(user=uid, db='EdxPrivate') else: self.mysqldb = MySQLDB(user=uid, passwd=pwd, db='EdxPrivate')
def __init__(self, bsonFileName, mysqlDbObj=None, forumTableName='contents', allUsersTableName='EdxPrivate.UserGrade', anonymize=True, allowAnonScreenName=False): ''' Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired), and place them into a MySQL table. :param bsonFileName: full path the .bson table. Set to None if instantiating for unit testing. :type bsonFileName: String :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are to be placed. If None, a new such object is created into MySQL db 'EdxForum' :type mysqlDbObj: MySQLDB :param forumTableName: name of table into which anonymized Forum entries are to be placed :type forumTableName: String :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names of users who post to the Forum. Used to redact their names from their own posts. :type allUsersTable: String :param anonymize: If true, Forum post entries in the MySQL table will be anonymized :type anonymize: bool :param allow_anon_screen_name: if True, then occurrences of poster's name in post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name is the hash used in other tables of the OpenEdX data. :type allow_anon_screen_name: Bool ''' self.bsonFileName = bsonFileName self.forumTableName = forumTableName self.forumDbName = 'EdxForum' self.allUsersTableName = allUsersTableName self.anonymize = anonymize self.allowAnonScreenName = allowAnonScreenName # If not unittest, but regular run, then mysqlDbObj is None if mysqlDbObj is None: self.mysql_passwd = self.getMySQLPasswd() self.mysql_dbhost ='localhost' self.mysql_user = getpass.getuser() # mySQLUser that started this process self.mydb = MySQLDB(mySQLUser=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName) else: self.mydb = mysqlDbObj self.counter=0 self.userCache = {} self.userSet = set() warnings.filterwarnings('ignore', category=MySQLdb.Warning) self.setupLogging() self.prepDatabase()
def setUp(self): if not TestPymysqlUtils.env_ok: raise RuntimeError(TestPymysqlUtils.err_msg) try: self.mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest') except ValueError as e: self.fail(str(e) + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')") # Make MySQL version more convenient to check: if (TestPymysqlUtils.major == 5 and TestPymysqlUtils.minor >= 7) or \ TestPymysqlUtils.major >= 8: self.mysql_ge_5_7 = True else: self.mysql_ge_5_7 = False
def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') self.db.dropTable(UserCountryTableCreator.DEST_TABLE) self.db.createTable( UserCountryTableCreator.DEST_TABLE, OrderedDict({ 'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""', 'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""', 'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""', 'country': 'varchar(255) NOT NULL DEFAULT ""' }))
def setUpClass(cls): super(AuxTableCopyTester, cls).setUpClass() # Read config file to see which MySQL server test_host we should # run the tests on. If setup.py does not exist, copy # setupSample.py to setup.py: config_info = ConfigInfo() cls.utils = Utilities() test_host = config_info.test_default_host user = config_info.test_default_user cls.test_host = test_host cls.user = user mysql_pwd = cls.mysql_pwd = cls.utils.get_db_pwd(test_host, unittests=True) cls.mysql_pwd = mysql_pwd db = AuxTableCopyTester.db = MySQLDB(user=user, passwd=mysql_pwd, db='information_schema', host=test_host) # If not working on localhost, where we expect a db # 'Unittest" Ensure there is a unittest db for us to work in. # We'll delete it later: if test_host == 'localhost': cls.db_name = 'Unittest' cls.mysql_pwd = '' else: cls.db_name = UnittestDbFinder(db).db_name db.close()
def setUp(self): self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[0] for colName in TestAddAnonToActivityGrade.studentmoduleExcerptColNames[1:]: self.allColNames += ',' + colName self.db = MySQLDB(user='******', passwd='', db='unittest') self.db.dropTable('StudentmoduleExcerpt') self.db.createTable('StudentmoduleExcerpt', TestAddAnonToActivityGrade.studentmoduleExcerptSchema, temporary=False) #***temporary=True) self.db.bulkInsert('StudentmoduleExcerpt', TestAddAnonToActivityGrade.studentmoduleExcerptColNames, TestAddAnonToActivityGrade.studentmoduleExcerptValues) self.db.createTable('ActivityGrade', TestAddAnonToActivityGrade.studentmoduleExcerptSchema) # Make sure there isn't left over content (if the table existed): self.db.truncateTable('ActivityGrade') # Rudimentary UserGrade table: self.db.dropTable('UserGrade') self.db.createTable('UserGrade', TestAddAnonToActivityGrade.userGradeExcerptSchema, temporary=False) self.db.bulkInsert('UserGrade', TestAddAnonToActivityGrade.userGradeExcerptColNames, TestAddAnonToActivityGrade.userGradeExcerptValues) self.db.close()
def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') # Make sure table exists. It should, and it should be filled # with all anon_screen_name and countries up the previous # load: self.db.createTable( UserCountryTableCreator.DEST_TABLE, OrderedDict({ 'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""', 'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""', 'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""', 'country': 'varchar(255) NOT NULL DEFAULT ""' }))
def ensureOpenMySQLDb(self): try: with open('/home/%s/.ssh/mysql' % self.currUser, 'r') as fd: self.mySQLPwd = fd.readline().strip() self.mysqlDb = MySQLDB(user=self.currUser, passwd=self.mySQLPwd, db=self.mainThread.defaultDb) except Exception: try: # Try w/o a pwd: self.mySQLPwd = None self.mysqlDb = MySQLDB(user=self.currUser, db=self.defaultDb) except Exception as e: # Remember the error msg for later: self.dbError = ` e ` self.mysqlDb = None return self.mysqlDb
def __init__(self, majors_table='sankey'): ''' ''' self.majors_table = majors_table self.mysql_passwd = self.getMySQLPasswd() self.mysql_dbhost = 'localhost' self.mysql_user = getpass.getuser( ) # mySQLUser that started this process self.mydb = MySQLDB(user=self.mysql_user, passwd=self.mysql_passwd, db=self.majors_table) (nodes, links) = self.get_nodes_and_links() SankeyDiagram.plot_sankey(nodes, links, plot_title="Majors Transitions")
def log_into_mysql(self, user, db_pwd, db=None, host='localhost', **kwargs): try: # Try logging in, specifying the database in which all the tables # will be created: db = MySQLDB(user=user, passwd=db_pwd, db=db, host=host, **kwargs) except ValueError as e: # Does the db not exist yet? if str(e).find("OperationalError(1049,") > -1: # Log in, specifying an always present db to 'use': db = MySQLDB(user=user, passwd=db_pwd, db='information_schema', host=host) # Create the db: db.execute('CREATE DATABASE %s;' % self.config_info.canvas_db_aux) else: raise DatabaseError(f"Cannot open Canvas database:\n{repr(e)}") except Exception as e: raise DatabaseError(f"Cannot open Canvas database:\n{repr(e)}") # Work in UTC, b/c default on Mac MySQL 8 is local time, # on Centos MySQL 5.7 is UTC; it's a mess: (err, _warn) = db.execute('SET @@session.time_zone = "+00:00"') if err is not None: self.log_warn(f"Cannot set session time zone to UTC: {repr(err)}") return db
def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') # Make sure table exists. It should, and it should be filled # with all anon_screen_name and countries up the previous # load: createCmd = '''CREATE TABLE UserCountry ( anon_screen_name varchar(40) NOT NULL DEFAULT "", two_letter_country varchar(2) NOT NULL DEFAULT "", three_letter_country varchar(3) NOT NULL DEFAULT "", country varchar(255) NOT NULL DEFAULT "" ) ENGINE=MyISAM; ''' self.db.dropTable('UserCountry') print("Creating table UserCountry...") self.db.execute(createCmd) print("Done creating table UserCountry.")
def testCreateTempTable(self): mySchema = { 'col1' : 'INT', 'col2' : 'varchar(255)', 'col3' : 'FLOAT', 'col4' : 'TEXT', #'col5' : 'JSON' # Only works MySQL 5.7 and up. } self.mysqldb.createTable('myTbl', mySchema, temporary=True) # Check that tbl exists. # NOTE: can't use query to mysql.informationschema, # b/c temp tables aren't listed there. try: # Will return some tuple; we don't # care what exaclty, as long as the # cmd doesn't fail: self.mysqldb.query('DESC myTbl').next() except Exception: self.fail('Temporary table not found after creation.') # Start new session, which should remove the table. # Query mysql information schema to check for table # present. Use raw cursor to test independently from # the pymysql_utils query() method: self.mysqldb.close() try: self.mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest') except ValueError as e: self.fail(str(e) + "Could not re-establish MySQL connection.") # NOTE: can't use query to mysql.informationschema, # b/c temp tables aren't listed there. try: self.mysqldb.query('DESC myTbl').next() self.fail("Temporary table did not disappear with session exit.") except ValueError: pass
def __init__(self, user, db, table, totalRows=None): ''' Constructor ''' home = os.environ['HOME'] with open(os.path.join(home, '.ssh/mysql')) as pwdFd: pwd = pwdFd.read().strip() db = MySQLDB(db=db, user=user, passwd=pwd) # Number of rows pulled from EventIp: rowCount = 0 # First row to get in the select statement: nextBatchStartRow = -UniqueAnonIpExtractor.BATCH_SIZE with open('/tmp/anonIps.csv', 'w') as fd: #*****with sys.stdout as fd: fd.write('anon_screen_name,ip\n') numRecords = db.query('SELECT count(*) from EventIp').next() if numRecords == 0: sys.exit() if totalRows is None: totalRows = numRecords while rowCount < numRecords and rowCount < totalRows: nextBatchStartRow += UniqueAnonIpExtractor.BATCH_SIZE for (anon_screen_name, ip) in db.query('SELECT anon_screen_name, event_ip from EventIp LIMIT %s,%s' % \ (nextBatchStartRow, UniqueAnonIpExtractor.BATCH_SIZE)): if UniqueAnonIpExtractor.seenAnons.get( anon_screen_name, None) is None: # The anon_screen_name in the db could actually be NULL, a.k.a. None. # Ignore those: if anon_screen_name is not None: fd.write(anon_screen_name + ',' + ip + '\n') UniqueAnonIpExtractor.seenAnons[ anon_screen_name] = 1 rowCount += 1 if (rowCount % UniqueAnonIpExtractor.BATCH_SIZE) == 0: print("Did %s rows." % rowCount) if rowCount >= totalRows: break print('Finished %s rows; %s unique anon_screen_names' % (rowCount, len(UniqueAnonIpExtractor.seenAnons.keys())))
def __init__(self, bsonFileName, mysqlDbObj=None, forumTableName='contents', allUsersTableName='EdxPrivate.UserGrade', anonymize=True, allowAnonScreenName=False): ''' Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired), and place them into a MySQL table. :param bsonFileName: full path the .bson table. Set to None if instantiating for unit testing. :type bsonFileName: String :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are to be placed. If None, a new such object is created into MySQL db 'EdxForum' :type mysqlDbObj: MySQLDB :param forumTableName: name of table into which anonymized Forum entries are to be placed :type forumTableName: String :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names of users who post to the Forum. Used to redact their names from their own posts. :type allUsersTable: String :param anonymize: If true, Forum post entries in the MySQL table will be anonymized :type anonymize: bool :param allow_anon_screen_name: if True, then occurrences of poster's name in post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name is the hash used in other tables of the OpenEdX data. :type allow_anon_screen_name: Bool ''' self.bsonFileName = bsonFileName self.forumTableName = forumTableName self.forumDbName = 'EdxForum' self.allUsersTableName = allUsersTableName self.anonymize = anonymize self.allowAnonScreenName = allowAnonScreenName # If not unittest, but regular run, then mysqlDbObj is None if mysqlDbObj is None: self.mysql_passwd = self.getMySQLPasswd() self.mysql_dbhost ='localhost' self.mysql_user = getpass.getuser() # mySQLUser that started this process self.mydb = MySQLDB(user=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName) else: self.mydb = mysqlDbObj self.counter=0 self.userCache = {} self.userSet = set() warnings.filterwarnings('ignore', category=MySQLdb.Warning) self.setupLogging() self.prepDatabase()
def setUp(self): self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum") # Fill the little MongoDB with test JSON lines self.resetMongoTestDb() self.mysqldb = MySQLDB(mySQLUser='******', db='unittest') # Start with an empty result MySQL table for each test: self.mysqldb.dropTable('contents') # Fill the fake UserGrade table with records of course participants: self.resetMySQLUserListDb() # Instantiate a Forum scrubber without the # name of a bson file that contains forum # records. That 'None' for the bson file will # make the class understand that it's being # instantiated for a unit test. self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade') self.forumScrubberRelatable = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True) self.forumScrubberClear = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)
def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') self.db.dropTable(UserCountryTableCreator.DEST_TABLE) self.db.createTable(UserCountryTableCreator.DEST_TABLE, OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""', 'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""', 'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""', 'country' : 'varchar(255) NOT NULL DEFAULT ""'}))
def setUp(self): application = None request = None # HTTPRequest.HTTPRequest() self.courseServer = CourseCSVServer(application, request, testing=True) try: self.mysqldb = MySQLDB(host="localhost", port=3306, user="******", db="unittest") except ValueError as e: self.fail( str(e) + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')" )
def __init__(self, uid, pwd, db='Edx'): ''' ****** Update this comment header Make connection to MySQL wrapper. @param uid: MySQL user under which to log in. Assumed to be other than None @type uid: String @param pwd: MySQL password for user uid. May be None. @type pwd: {String | None} ''' self.db = db if pwd is None: self.mysqldbStudModule = MySQLDB(user=uid, db=db) else: self.mysqldbStudModule = MySQLDB(user=uid, passwd=pwd, db=db) # Create a string with the parameters of the SELECT call, # (activity_grade_id,student_id,...): self.colSpec = AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[0] for colName in AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[1:]: self.colSpec += ',' + colName self.pullRowByRow()
def setUpClass(cls): super(CanvasUtilsTests, cls).setUpClass() # Read config file to see which MySQL server test_host we should # run the tests on. If setup.py does not exist, copy # setupSample.py to setup.py: config_info = ConfigInfo() test_host = config_info.test_default_host user = config_info.test_default_user cls.canvas_pwd_file = config_info.canvas_pwd_file # Access to common functionality: cls.utils = Utilities() # If not working on localhost, where we expect a db # 'Unittest" Ensure there is a unittest db for us to work in. # We'll delete it later: if test_host == 'localhost': db_name = 'Unittest' else: db = None db = MySQLDB(host=test_host, user=config_info.test_default_user, passwd=cls.utils.get_db_pwd(test_host, unittests=True)) try: db_name = UnittestDbFinder(db).db_name except Exception as e: raise AssertionError( f"Cannot open db to find a unittest db: {repr(e)}") finally: if db is not None: db.close() CanvasUtilsTests.test_host = test_host CanvasUtilsTests.unittests_db_nm = db_name CanvasUtilsTests.user = user
def testAddAnonToActivityTable(self): try: # Modify the fake courseware_studentmodule excerpt # to add anon_screen_name, computer plusses/minusses, # compute grade percentage, etc: AnonAndModIDAdder('unittest', '', db='unittest', testing=True) self.db = MySQLDB(user='******', passwd='', db='unittest') for rowNum, row in enumerate( self.db.query('SELECT %s FROM ActivityGrade;' % self.allColNames)): #print(row) if rowNum == 0: self.assertEqual(( 0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1, datetime.datetime(2014, 1, 10, 4, 10, 45), datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1', 'abc', 'Guided Walkthrough', 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b' ), row) elif rowNum == 1: self.assertEqual(( 1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1, datetime.datetime(2014, 1, 10, 11, 30, 23), datetime.datetime(2014, 2, 10, 14, 30, 12), 'modtype2', 'def', 'Evaluation', 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3' ), row) elif rowNum == 2: self.assertEqual(( 2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1, datetime.datetime(2014, 1, 10, 18, 34, 12), datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2', 'None', 'Introduction', 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8' ), row) finally: self.db.close()
def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') # Make sure table exists. It should, and it should be filled # with all anon_screen_name and countries up the previous # load: createCmd = '''CREATE TABLE %s ( anon_screen_name varchar(40) NOT NULL DEFAULT "", two_letter_country varchar(2) NOT NULL DEFAULT "", three_letter_country varchar(3) NOT NULL DEFAULT "", country varchar(255) NOT NULL DEFAULT "", region varchar(255) NOT NULL DEFAULT "", city varchar(255) NOT NULL DEFAULT "", lat_long point NOT NULL ) ENGINE=MyISAM; ''' % UserDetailedLocationTableCreator.DEST_TABLE self.db.dropTable('UserCountry') print("Creating table %..." % UserDetailedLocationTableCreator.DEST_TABLE) self.db.execute(createCmd) print("Done creating table %s." % UserDetailedLocationTableCreator.DEST_TABLE)
def setUpClass(cls): super(FindUnittestDbTester, cls).setUpClass() # Get whether to test on localhost, or on # remote host: config_info = ConfigInfo() cls.test_host = config_info.test_default_host cls.user = config_info.test_default_user utils = Utilities() cls.db = MySQLDB(user=cls.user, passwd=utils.get_db_pwd(cls.test_host, unittests=True), db='information_schema', host=cls.test_host)
def testAddAnonToActivityTable(self): try: # Modify the fake courseware_studentmodule excerpt # to add anon_screen_name, computer plusses/minusses, # compute grade percentage, etc: AnonAndModIDAdder('unittest', '', db='unittest') self.db = MySQLDB(user='******', passwd='', db='unittest') for rowNum, row in enumerate(self.db.query('SELECT %s FROM ActivityGrade;' % self.allColNames)): #print(row) if rowNum == 0: self.assertEqual((0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1, datetime.datetime(2014, 1, 10, 4, 10, 45), datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1', '', 'Guided Walkthrough', 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'), row) elif rowNum == 1: self.assertEqual((1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1, datetime.datetime(2014, 1, 10, 11, 30, 23), datetime.datetime(2014, 2, 10, 14, 30, 12), 'modtype2', '', 'Evaluation', 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'), row) elif rowNum == 2: self.assertEqual((2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1, datetime.datetime(2014, 1, 10, 18, 34, 12), datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2', '', 'Introduction', 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'), row) finally: self.db.close()
def setUp(self): self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum") # Fill the little MongoDB with test JSON lines self.resetMongoTestDb() self.mysqldb = MySQLDB(user='******', db='unittest') # Start with an empty result MySQL table for each test: self.mysqldb.dropTable('contents') # Fill the fake UserGrade table with records of course participants: self.resetMySQLUserListDb() # Instantiate a Forum scrubber without the # name of a bson file that contains forum # records. That 'None' for the bson file will # make the class understand that it's being # instantiated for a unit test. self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade') self.forumScrubberRelatable = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True) self.forumScrubberClear = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)
def __init__(self, topic=None, user='******', passwd=''): ''' Instantiated for each incoming bus message ''' if topic is None: topic = CoursesGivenQuarter.module_topic self.mysqldb = MySQLDB(host='127.0.0.1', port=CoursesGivenQuarter.MYSQL_PORT_LOCAL, user=user, passwd=passwd, db='Edx') # The following statement is needed only # if your callback is a method (rather than a top # level function). That's because Python methods # take 'self' as a first argument, while the Bus # expects a function that just takes topicName, msgText, and msgOffset. # The following statement creates a function wrapper around # our callback method that has the leading 'self' parameter built # in. The process is called function currying: self.requestDeliveryMethod = functools.partial(self.requestCoursesForQuarter) # Create a BusAdapter instance: self.bus = BusAdapter() # Tell the bus that you are interested in the topic 'example_use', # and want callbacks to self.exampleDeliveryMethod whenever # a message arrives: self.bus.subscribeToTopic(topic, self.requestDeliveryMethod) # Now we do nothing. In a production system you # would do something useful here: while True: # do anything you like self.bus.waitForMessage(CoursesGivenQuarter.module_topic)
def get_mysql_version(cls): ''' Return a tuple: (major, minor). Example, for MySQL 5.7.15, return (5,7). Return (None,None) if version number not found. ''' # Where is mysql client program? mysql_path = MySQLDB.find_mysql_path() # Get version string, which looks like this: # 'Distrib 5.7.15, for osx10.11 (x86_64) using EditLine wrapper\n' version_str = subprocess.check_output([mysql_path, '--version']).decode('utf-8') # Isolate the major and minor version numbers (e.g. '5', and '7') pat = re.compile(r'([0-9]*)[.]([0-9]*)[.]') match_obj = pat.search(version_str) if match_obj is None: return (None,None) (major, minor) = match_obj.groups() return (int(major), int(minor))
def tearDownClass(cls): super(AuxTableCopyTester, cls).tearDownClass() if cls.test_host == 'localhost': return db = None try: # Remove the unittest db we created: print(f"Removing database '{cls.db_name}'...") db = MySQLDB(user=cls.user, passwd=cls.mysql_pwd, db='information_schema', host=cls.test_host) db.execute(f"DROP DATABASE {cls.db_name}") print(print(f"Done removing database '{cls.db_name}'...")) #AuxTableCopyTester.copier_obj.close() pass finally: if db is not None: db.close()
def log_into_mysql(cls, user, db_pwd, db=None): host = AuxTableCopyTester.test_host try: # Try logging in, specifying the database in which all the tables # will be created: db = MySQLDB(user=user, passwd=db_pwd, db=db, host=host) except ValueError as e: # Does unittest not exist yet? if str(e).find("OperationalError(1049,") > -1: # Log in without specifying a db to 'use': db = MySQLDB(user=user, passwd=db_pwd, host=host) # Create the db: db.execute('CREATE DATABASE %s;' % 'unittest') else: raise RuntimeError("Cannot open Canvas database: %s" % repr(e)) except Exception as e: raise RuntimeError("Cannot open Canvas database: %s" % repr(e)) return db
class TestAddAnonToActivityGrade(unittest.TestCase): studentmoduleExcerptSchema = OrderedDict({ 'activity_grade_id' : 'INT', 'student_id' : 'INT', 'course_display_name' : 'VARCHAR(255)', 'grade' : 'VARCHAR(5)', 'max_grade' : 'DOUBLE', 'percent_grade' : 'DOUBLE', 'parts_correctness' : 'VARCHAR(255)', 'answers' : 'VARCHAR(255)', 'num_attempts' : 'INT', 'first_submit' : 'DATETIME', 'last_submit' : 'DATETIME', 'module_type' : 'VARCHAR(255)', 'anon_screen_name' : 'VARCHAR(40)', 'resource_display_name' : 'VARCHAR(255)', 'module_id' : 'VARCHAR(255)' }) studentmoduleExcerptColNames = [ 'activity_grade_id', 'student_id', 'course_display_name', 'grade', 'max_grade', 'percent_grade', 'parts_correctness', 'answers', 'num_attempts', 'first_submit', 'last_submit', 'module_type', 'anon_screen_name', 'resource_display_name', 'module_id' ] state1 = ' {"correct_map": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {"hint": "", "hintmode": null, "correctness": "correct", "npoints": null, "msg": "", "queuestate": null}}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}, "attempts": 1, "seed": 1, "done": true, "student_answers": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": "choice_1"}} ' state2 = '{"correct_map": {}, "seed": 1, "student_answers": {}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}}' state3 = '{"position": 1}' modid1 = 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b' modid2 = 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3' modid3 = 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8' studentmoduleExcerptValues = \ [ [0,1,'myCourse',3,10,-1.0,state1,'',-1,'2014-01-10 04:10:45','2014-02-10 10:14:40','modtype1','','',modid1], [1,2,'myCourse',5,10,-1.0,state2,'',-1,'2014-01-10 11:30:23','2014-02-10 14:30:12','modtype2','','',modid2], [2,3,'yourCourse',8,10,-1.0,state3,'',-1,'2014-01-10 18:34:12','2014-02-10 19:10:33','modtype2','','',modid3] ] def setUp(self): self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[0] for colName in TestAddAnonToActivityGrade.studentmoduleExcerptColNames[1:]: self.allColNames += ',' + colName self.db = MySQLDB(user='******', passwd='', db='unittest') self.db.dropTable('StudentmoduleExcerpt') self.db.createTable('StudentmoduleExcerpt', TestAddAnonToActivityGrade.studentmoduleExcerptSchema, temporary=False) #***temporary=True) self.db.bulkInsert('StudentmoduleExcerpt', TestAddAnonToActivityGrade.studentmoduleExcerptColNames, TestAddAnonToActivityGrade.studentmoduleExcerptValues) self.db.createTable('ActivityGrade', TestAddAnonToActivityGrade.studentmoduleExcerptSchema) # Make sure there isn't left over content (if the table existed): self.db.truncateTable('ActivityGrade') self.db.close() def tearDown(self): self.db = MySQLDB(user='******', passwd='', db='unittest') # Can't drop tables: hangs #self.db.dropTable('StudentmoduleExcerpt') #self.db.dropTable('ActivityGrade') self.db.close() pass def testAddAnonToActivityTable(self): try: # Modify the fake courseware_studentmodule excerpt # to add anon_screen_name, computer plusses/minusses, # compute grade percentage, etc: AnonAndModIDAdder('unittest', '', db='unittest') self.db = MySQLDB(user='******', passwd='', db='unittest') for rowNum, row in enumerate(self.db.query('SELECT %s FROM ActivityGrade;' % self.allColNames)): #print(row) if rowNum == 0: self.assertEqual((0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1, datetime.datetime(2014, 1, 10, 4, 10, 45), datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1', '', 'Guided Walkthrough', 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'), row) elif rowNum == 1: self.assertEqual((1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1, datetime.datetime(2014, 1, 10, 11, 30, 23), datetime.datetime(2014, 2, 10, 14, 30, 12), 'modtype2', '', 'Evaluation', 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'), row) elif rowNum == 2: self.assertEqual((2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1, datetime.datetime(2014, 1, 10, 18, 34, 12), datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2', '', 'Introduction', 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'), row) finally: self.db.close()
def setUpClass(cls): # Ensure that a user unittest with the proper # permissions exists in the db: TestPymysqlUtils.env_ok = True TestPymysqlUtils.err_msg = '' try: needed_grants = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'CREATE TEMPORARY TABLES', 'DROP', 'ALTER'] mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest') grant_query = 'SHOW GRANTS FOR unittest@localhost' query_it = mysqldb.query(grant_query) # First row of the SHOW GRANTS response should be # one of: first_grants = ["GRANT USAGE ON *.* TO 'unittest'@'localhost'", "GRANT USAGE ON *.* TO `unittest`@`localhost`" ] # Second row depends on the order in which the # grants were provided. The row will look something # like: # GRANT SELECT, INSERT, UPDATE, DELETE, ..., CREATE, DROP, ALTER ON `unittest`.* TO 'unittest'@'localhost' # Verify: usage_grant = query_it.next() if usage_grant not in first_grants: TestPymysqlUtils.err_msg = ''' User 'unittest' is missing USAGE grant needed to run the tests. Also need this in your MySQL: %s ''' % 'GRANT %s ON unittest.* TO unittest@localhost' % ','.join(needed_grants) TestPymysqlUtils.env_ok = False return grants_str = query_it.next() for needed_grant in needed_grants: if grants_str.find(needed_grant) == -1: TestPymysqlUtils.err_msg = ''' User 'unittest' does not have the '%s' permission needed to run the tests. Need this in your MySQL: %s ''' % (needed_grant, 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants)) TestPymysqlUtils.env_ok = False return except (ValueError,RuntimeError): TestPymysqlUtils.err_msg = ''' For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest'. To create these prerequisites in MySQL: CREATE USER unittest@localhost; CREATE DATABASE unittest; This user needs permissions: %s ''' % 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants) TestPymysqlUtils.env_ok = False # Check MySQL version: try: (major, minor) = TestPymysqlUtils.get_mysql_version() except Exception as e: raise OSError('Could not get mysql version number: %s' % str(e)) if major is None: print('Warning: MySQL version number not found; testing as if V5.7') TestPymysqlUtils.major = 5 TestPymysqlUtils.minor = 7 else: TestPymysqlUtils.major = major TestPymysqlUtils.minor = minor known_versions = [(5,6), (5,7), (8,0)] if (major,minor) not in known_versions: print('Warning: MySQL version is %s.%s; but testing as if V5.7') TestPymysqlUtils.major = 5 TestPymysqlUtils.minor = 7
class AnonAndModIDAdder(object): # Number of rows to process in memory # before writing to ActivityGrade: BATCH_SIZE = 10000 # For explanation of the following regex patterns, # see header comment of parseStateJSON: SOLUTION_RESULT_PATTERN = re.compile(r'[^"]*correctness": "([^"]*)') SOLUTION_ANSWERS_PATTERN = re.compile(r'[^:]*: "([^"]*)"') ACTIVITY_GRADE_COL_NAMES = [ 'activity_grade_id', 'student_id', 'course_display_name', 'grade', 'max_grade', 'percent_grade', 'parts_correctness', 'answers', 'num_attempts', 'first_submit', 'last_submit', 'module_type', 'anon_screen_name', 'resource_display_name', 'module_id' ] # Indices into tuples from StudentmoduleExcerpt: STUDENT_INT_ID_INDEX = 1 GRADE_INDEX = 3 MAX_GRADE_INDEX = 4 PERCENT_GRADE_INDEX = 5 PARTS_CORRECTNESS_INDEX = 6 ANSWERS_INDEX = 7 NUM_ATTEMPTS_INDEX = 8 ANON_SCREEN_NAME_INDEX = 12 RESOURCE_DISPLAY_NAME_INDEX = 13 MODULE_ID_INDEX = 14 def __init__(self, uid, pwd, db='Edx', testing=False): ''' ****** Update this comment header Make connection to MySQL wrapper. @param uid: MySQL user under which to log in. Assumed to be other than None @type uid: String @param pwd: MySQL password for user uid. May be None. @type pwd: {String | None} ''' self.db = db if pwd is None: self.mysqldbStudModule = MySQLDB(user=uid, db=db) else: self.mysqldbStudModule = MySQLDB(user=uid, passwd=pwd, db=db) # Create a string with the parameters of the SELECT call, # (activity_grade_id,student_id,...): self.colSpec = AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[0] for colName in AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[1:]: self.colSpec += ',' + colName self.cacheIdInt2Anon(testing) self.pullRowByRow() def cacheIdInt2Anon(self, testing=False): ''' Builds a dict to map platform integers to anon_screen_names. :param testing: If set true, then all tables are assumed to be in MySQL DB unittest. :type testing: boolean ''' self.int2AnonCache = {} if testing: queryIt = self.mysqldbStudModule.query("SELECT student_id AS user_int_id, \ unittest.UserGrade.anon_screen_name \ FROM unittest.StudentmoduleExcerpt LEFT JOIN unittest.UserGrade \ ON unittest.StudentmoduleExcerpt.student_id = unittest.UserGrade.user_int_id;") else: queryIt = self.mysqldbStudModule.query("SELECT student_id AS user_int_id, \ EdxPrivate.UserGrade.anon_screen_name \ FROM edxprod.StudentmoduleExcerpt LEFT JOIN EdxPrivate.UserGrade \ ON edxprod.StudentmoduleExcerpt.student_id = EdxPrivate.UserGrade.user_int_id;") for user_int_id, anon_screen_name in queryIt: self.int2AnonCache[user_int_id] = anon_screen_name; def pullRowByRow(self): rowBatch = [] theQuery = "SELECT activity_grade_id,student_id,\ course_display_name,grade,max_grade,percent_grade,\ parts_correctness,answers,num_attempts,first_submit,\ last_submit,module_type,anon_screen_name,\ resource_display_name,module_id \ FROM edxprod.StudentmoduleExcerpt \ WHERE isTrueCourseName(course_display_name) = 1;" if self.db == 'unittest': queryIt = self.mysqldbStudModule.query("SELECT %s FROM unittest.StudentmoduleExcerpt;" % self.colSpec) else: #**********queryIt = self.mysqldbStudModule.query("SELECT %s FROM edxprod.StudentmoduleExcerpt;" % self.colSpec) queryIt = self.mysqldbStudModule.query(theQuery) for studmodTuple in queryIt: # Results return as tuples, but we need to change tuple items by index. # So must convert to list: studmodTuple = list(studmodTuple) # Resolve the module_id into a human readable resource_display_name: moduleID = studmodTuple[AnonAndModIDAdder.MODULE_ID_INDEX] studmodTuple[AnonAndModIDAdder.RESOURCE_DISPLAY_NAME_INDEX] = self.getResourceDisplayName(moduleID) # Compute the anon_screen_name: studentIntId = studmodTuple[AnonAndModIDAdder.STUDENT_INT_ID_INDEX] try: studmodTuple[AnonAndModIDAdder.ANON_SCREEN_NAME_INDEX] = self.int2AnonCache[studentIntId] except TypeError: studmodTuple[AnonAndModIDAdder.ANON_SCREEN_NAME_INDEX] = '' # Pick grade and max_grade out of the row, # compute the percentage, and place that # back into the row in col grade = studmodTuple[AnonAndModIDAdder.GRADE_INDEX] max_grade = studmodTuple[AnonAndModIDAdder.MAX_GRADE_INDEX] percent_grade = 'NULL' try: percent_grade = round((int(grade) * 100.0/ int(max_grade)), 2) except: pass studmodTuple[AnonAndModIDAdder.PERCENT_GRADE_INDEX] = str(percent_grade) # Parse 'state' column from JSON and put result into plusses/minusses column: (partsCorrectness, answers, numAttempts) = \ self.parseStateJSON(studmodTuple[AnonAndModIDAdder.PARTS_CORRECTNESS_INDEX]) studmodTuple[AnonAndModIDAdder.PARTS_CORRECTNESS_INDEX] = partsCorrectness studmodTuple[AnonAndModIDAdder.ANSWERS_INDEX] = ','.join(answers) studmodTuple[AnonAndModIDAdder.NUM_ATTEMPTS_INDEX] = numAttempts rowBatch.append(studmodTuple) if len(rowBatch) >= AnonAndModIDAdder.BATCH_SIZE: self.mysqldbStudModule.bulkInsert('ActivityGrade', AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES, rowBatch) rowBatch = [] if len(rowBatch) > 0: self.mysqldbStudModule.bulkInsert('ActivityGrade', AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES, rowBatch) def getResourceDisplayName(self, moduleID): moduleName = Utils.getModuleNameFromID(moduleID) return moduleName def parseStateJSON(self, jsonStateStr, srcTableName='courseware_studentmodule'): ''' Given the 'state' column from a courseware_studentmodule column, return a 3-tuple: (plusMinusStr, answersArray, numAttempts) The plusMinusStr will be a string of '+' and '-'. A plus means that the problem solution part of an assignment submission was correct; a '-' means it was incorrect. The plus/minus indicators are arranged in the order of the problem subparts; like '++-' for a three-part problem in which the student got the first two correct, the last one incorrect. The answersArray will be an array of answers to the corresponding problems, like ['choice_0', 'choice_1']. Input for a problem solution with two parts looks like this:: { "correct_map": { "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": { "hint": "", "hintmode": null, "correctness": "correct", "npoints": null, "msg": "", "queuestate": null }, "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": { "hint": "", "hintmode": null, "correctness": "correct", "npoints": null, "msg": "", "queuestate": null } }, "input_state": { "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": { }, "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": { } }, "attempts": 3, "seed": 1, "done": true, "student_answers": { "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": "choice_3", "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": "choice_0" } } This structure is ugly enough even when imported into a dict via json.loads() that a regular expression solution is faster. Three regexp are used: - SOLUTION_RESULT_PATTERN = re.compile(r'[^"]*correctness": "([^"]*)') looks for the correctness entries: 'correct', 'incorrect'. First the regex throws away front parts the JSON that do not consist of 'correctness": '. That's the '"[^"]*correctness": "' par of the regex Next, a capture group grabs all letters that are not a double quote. That's the '([^"]*)' part of the regex. Those capture groups will contain the words 'correct' or 'incorrect'. - SOLUTION_ANSWERS_PATTERN = re.compile(r'[^:]*: "([^"]*)"') looks for the answers themselves: 'choice_0', etc. This pattern assumes that we first cut off from the JSON all the front part up to 'student_answers":'. The regex operates over the rest: The '[^:]*: "' skips over all text up to the next colon, followed by a space and opening double quote. The capture group grabs the answer, as in 'choice_0'. @param jsonStateStr: @type jsonStateStr: @param srcTableName: @type srcTableName: @return: plus/minus string, array of participant's answers, number of attempts. If number of attempts is -1 the row was not a problem statement, or number of attempts was otherwise unavailable. @rtype: (string, [string], int) ''' successResults = '' # The following badAnswers array is filled with # just the wrong answers. It's maintained, but # not currently returned, b/c users didn't feel # they needed it. badAnswers = [] answers = [] numAttempts = -1 # Many state entries are not student problem result # submissions, but of the form "{'postion': 4}". # Weed those out: if jsonStateStr.find('correct_map') == -1: #return (successResults, badAnswers, numAttempts) return (successResults, answers, numAttempts) # Get the ['correct','incorrect',...] array; # we'll use it later on: allSolutionResults = AnonAndModIDAdder.SOLUTION_RESULT_PATTERN.findall(jsonStateStr) # Next, get all the answers themselves. # Chop off all the JSON up to 'student_answers":': chopTxtMarker = 'student_answers":' chopPos = jsonStateStr.find(chopTxtMarker) if chopPos == -1: # Couldn't find the student answers; fine; #return (successResults, badAnswers, numAttempts) return (successResults, answers, numAttempts) else: # Get left with str starting at '{' in # "student_answers": { # "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": "choice_3", # "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": "choice_0" restJSON = jsonStateStr[chopPos+len(chopTxtMarker):] # ... and put the regex to work: answers = AnonAndModIDAdder.SOLUTION_ANSWERS_PATTERN.findall(restJSON) # Find number of attempts: # Find '"attempts": 3,...': chopTxtMarker = '"attempts": ' chopPos = jsonStateStr.find(chopTxtMarker) if chopPos > 0: upToNum = jsonStateStr[chopPos+len(chopTxtMarker):] try: # The 'str' part of 'str(upToNum)' is needed b/c # the JSON is unicode, and isdigit() barfs when given # unicode: numAttempts = int("".join(itertools.takewhile(str.isdigit, str(upToNum)))) except ValueError: # Couldn't find the number of attempts. # Just punt. pass except TypeError: # Unicode garbage, clearly not a digit pass # Go through the ['correct','incorrect',...] array, # and take two actions: if correct, add a '+' to # the successResults str; if 'incorrect' then add # a '-' to successResults, and transfer the 'bad' # answer to the badAnswers array: for (i, correctness) in enumerate(allSolutionResults): if correctness == 'correct': successResults += '+' else: successResults += '-' try: badAnswers.append(answers[i]) except IndexError: badAnswers.append('<unknown>') #return (successResults, badAnswers, numAttempts) return (successResults, answers, numAttempts)
class EdxForumScrubber(object): ''' Given a .bson file of OpenEdX Forum posts, load the file into a MongoDB. Then pull a post at a time, anonymize, and insert a selection of fields into a MySQL db. The MongoDb entries look like this:: { "_id" : ObjectId("51b75a48f359c40a00000028"), "_type" : "Comment", "abuse_flaggers" : [ ], "anonymous" : false, "anonymous_to_peers" : false, "at_position_list" : [ ], "author_id" : "26344", "author_username" : "Minelly48", "body" : "I am Gwen.I am a nursing professor who took statistics many years ago and want to refresh my knowledge.", "comment_thread_id" : ObjectId("51b754e5f359c40a0000001d"), "course_id" : "Medicine/HRP258/Statistics_in_Medicine", "created_at" : ISODate("2013-06-11T17:11:36.831Z"), "endorsed" : false, "historical_abuse_flaggers" : [ ], "parent_ids" : [ ], "updated_at" : ISODate("2013-06-11T17:11:36.831Z"), "visible" : true, "votes" : { "count" : 2, "down" : [ ], "down_count" : 0, "point" : 2, "up" : [ "40325", "20323" ], "up_count" : 2 }, "sk" : "51b75a48f359c40a00000028" } Depending on parameter allowAnonScreenName in the __init__() method, forum entries in the relational database will be associated with the same hash that is used to anonymize other parts of the OpenEdX data. ''' LOG_DIR = '/home/dataman/Data/EdX/NonTransformLogs' # Pattern for email id - strings of alphabets/numbers/dots/hyphens followed # by an @ or at followed by combinations of dot/. followed by the edu/com # also, allow for spaces emailPattern='(.*)\s+([a-zA-Z0-9\(\.\-]+)[@]([a-zA-Z0-9\.]+)(.)(edu|com)\\s*(.*)' #emailPattern='(.*)\\s+([a-zA-Z0-9\\.]+)\\s*(\\(f.*b.*)?(@)\\s*([a-zA-Z0-9\\.\\s;]+)\\s*(\\.)\\s*(edu|com)\\s+(.*)' compiledEmailPattern = re.compile(emailPattern); # Pattern for replacing embedded double quotes in post bodies, # unless they are already escaped w/ a backslash. The # {0,1} means a match if zero or one repetition. It's # needed so that double quotes at the very start of a # string are matched: no preceding character at all: #doublQuoteReplPattern = re.compile(r'[^\\]{0,1}"') doublQuoteReplPattern = re.compile(r'[\\]{0,}"') # Schema of EdxForum.contents: an ordered dict that is # used twice: the table creation MySQL command is constructed # from this dict, and the dict is used to ensure that # all its keys (i.e. future column names) are present # in each MongoDB object. See also createForumTable(). # In createForumTable() either entry anon_screen_name, # or screen_name in the dict below will be deleted, based # on whether we are asked to anonymize or not: forumSchema = OrderedDict({}) forumSchema['forum_post_id'] = "varchar(40) NOT NULL DEFAULT 'unavailable'" forumSchema['anon_screen_name'] = "varchar(40) NOT NULL DEFAULT 'anon_screen_name_redacted'" # This or next deleted based on anonymize yes/no forumSchema['screen_name'] = "varchar(40) NOT NULL DEFAULT 'anon_screen_name_redacted'" # This or prev deleted based on anonymize yes/no forumSchema['type'] = "varchar(20) NOT NULL" forumSchema['anonymous'] = "varchar(10) NOT NULL" forumSchema['anonymous_to_peers'] = "varchar(10) NOT NULL" forumSchema['at_position_list'] = "varchar(200) NOT NULL" forumSchema['forum_uid'] = "varchar(40) NOT NULL" forumSchema['body'] = "TEXT NOT NULL" #"varchar(2500) NOT NULL" forumSchema['course_display_name'] = "varchar(100) NOT NULL" forumSchema['created_at'] = "datetime NOT NULL" forumSchema['votes'] = "TEXT NOT NULL" # "varchar(200) NOT NULL" forumSchema['count'] = "int(11) NOT NULL" forumSchema['down_count'] = "int(11) NOT NULL" forumSchema['up_count'] = "int(11) NOT NULL" forumSchema['up'] = "varchar(200) DEFAULT NULL" forumSchema['down'] = "varchar(200) DEFAULT NULL" forumSchema['comment_thread_id'] = "varchar(255) DEFAULT NULL" forumSchema['parent_id'] = "varchar(255) DEFAULT NULL" forumSchema['parent_ids'] = "varchar(255) DEFAULT NULL" forumSchema['sk'] = "varchar(255) DEFAULT NULL" forumSchema['confusion'] = "varchar(20) NOT NULL DEFAULT ''" forumSchema['happiness'] = "varchar(20) NOT NULL DEFAULT ''" def __init__(self, bsonFileName, mysqlDbObj=None, forumTableName='contents', allUsersTableName='EdxPrivate.UserGrade', anonymize=True, allowAnonScreenName=False): ''' Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired), and place them into a MySQL table. :param bsonFileName: full path the .bson table. Set to None if instantiating for unit testing. :type bsonFileName: String :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are to be placed. If None, a new such object is created into MySQL db 'EdxForum' :type mysqlDbObj: MySQLDB :param forumTableName: name of table into which anonymized Forum entries are to be placed :type forumTableName: String :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names of users who post to the Forum. Used to redact their names from their own posts. :type allUsersTable: String :param anonymize: If true, Forum post entries in the MySQL table will be anonymized :type anonymize: bool :param allow_anon_screen_name: if True, then occurrences of poster's name in post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name is the hash used in other tables of the OpenEdX data. :type allow_anon_screen_name: Bool ''' self.bsonFileName = bsonFileName self.forumTableName = forumTableName self.forumDbName = 'EdxForum' self.allUsersTableName = allUsersTableName self.anonymize = anonymize self.allowAnonScreenName = allowAnonScreenName # If not unittest, but regular run, then mysqlDbObj is None if mysqlDbObj is None: self.mysql_passwd = self.getMySQLPasswd() self.mysql_dbhost ='localhost' self.mysql_user = getpass.getuser() # mySQLUser that started this process self.mydb = MySQLDB(user=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName) else: self.mydb = mysqlDbObj self.counter=0 self.userCache = {} self.userSet = set() warnings.filterwarnings('ignore', category=MySQLdb.Warning) self.setupLogging() self.prepDatabase() #******mysqldb.commit(); #******logging.info('commit completed!') def runConversion(self): ''' Do the actual work. We don't call this method from __init__() so that unittests can create an EdxForumScrubber instance without doing the actual work. Instead, unittests call individual methods. ''' self.populateUserCache(); self.mongo_database_name = 'TmpForum' self.collection_name = 'contents' # Load bson file into Mongodb: self.loadForumIntoMongoDb(self.bsonFileName) self.mongodb = MongoDB(dbName=self.mongo_database_name, collection=self.collection_name) # Anonymize each forum record, and transfer to MySQL db: self.forumMongoToRelational(self.mongodb, self.mydb,'contents' ) self.mydb.close() self.mongodb.close() self.logInfo('Entered %d records into %s' % (self.counter, self.forumDbName + '.' + self.forumTableName)) def loadForumIntoMongoDb(self, bsonFilename): mongoclient = MongoClient(); db = mongoclient[self.mongo_database_name]; # Get collection object: collection = db[self.collection_name]; # Clear out any old forum entries: self.logInfo('Preparing to delete the collection ') collection.remove() self.logInfo('Deleting mongo collection completed. Will now attempt a mongo restore') self.logInfo('Spawning subprocess to execute mongo restore') with open(self.logFilePath,'w') as outfile: ret = subprocess.call( ['mongorestore', '--drop', '--db', self.mongo_database_name, '--collection', self.collection_name, bsonFilename], stdout=outfile, stderr=outfile) self.logDebug('Return value from mongorestore is %s' % (ret)) objCount = subprocess.check_output( ['mongo', '--quiet', '--eval', 'printjson(db.contents.count())', self.mongo_database_name, ], stderr=outfile) self.numMongoItems = objCount self.logInfo('Available Forum posts %s' % objCount) def forumMongoToRelational(self, mongodb, mysqlDbObj, mysqlTable): ''' Given a pymongo collection object in which Forum posts are stored, and a MySQL db object and table name, anonymize each mongo record, and insert it into the MySQL table. :param collection: collection object obtained via a mangoclient object :type collection: Collection :param mysqlDbObj: wrapper to MySQL db. See pymysql_utils.py :type mysqlDbObj: MYSQLDB :param mysqlTable: name of table where posts are to be deposited. Example: 'contents'. :type mysqlTable: String ''' #command = 'mongorestore %s -db %s -mongoForumRec %s'%(self.bson_filename,self.mongo_database_name,self.collection_name) #print command self.logInfo('Will start inserting from mongo collection to MySQL') for mongoForumRec in mongodb.query({}): mongoRecordObj = MongoRecord(mongoForumRec) try: # Check whether 'up' can be converted to a list list(mongoRecordObj['up']) except Exception as e: self.logInfo("Error in conversion of 'up' field to a list (setting cell to -1):" + `e`) mongoRecordObj['up'] ='-1' # Make sure the MongoDB object has all fields that will # be needed for the forum schema: self.ensureSchemaAdherence(mongoRecordObj) self.insert_content_record(mysqlDbObj, mysqlTable, mongoRecordObj); def prepDatabase(self): ''' Declare variables and execute statements preparing the database to configure options - e.g.: setting char set to utf, connection type to utf truncating the already existing table. ''' try: self.logDebug("Setting and assigning char set for mysqld. will truncate old values") self.mydb.execute('SET NAMES utf8;'); self.mydb.execute('SET CHARACTER SET utf8;'); self.mydb.execute('SET character_set_connection=utf8;'); # Compose fully qualified table name from the db name to # which self.mydb is connected, and the forum table name # that was established in __init__(): fullTblName = self.mydb.dbName() + '.' + self.forumTableName # Clear old forum data out of the table: try: self.mydb.dropTable(fullTblName) # Create MySQL table for the posts. If we are to # anonymize, the poster name column will be 'screen_name', # else it will be 'anon_screen_name': self.createForumTable(self.anonymize) self.logDebug("setting and assigning char set complete. Truncation succeeded") except ValueError as e: self.logDebug("Failed either to set character codes, or to create forum table %s: %s" % (fullTblName, `e`)) except MySQLdb.Error,e: self.logInfo("MySql Error exiting %d: %s" % (e.args[0],e.args[1])) # print e sys.exit(1)
def testBadParameters(self): self.mysqldb.close() # Test setting parameters illegally to None: try: with self.assertRaises(Exception) as context: MySQLDB(host=None, port=3306, user='******', db='unittest') self.assertTrue("None value(s) for ['host']; none of host,port,user,passwd or db must be None" in str(context.exception)) with self.assertRaises(Exception) as context: MySQLDB(host='localhost', port=None, user='******', db='unittest') self.assertTrue("None value(s) for ['port']; none of host,port,user,passwd or db must be None" in str(context.exception)) with self.assertRaises(Exception) as context: MySQLDB(host='localhost', port=3306, user=None, db='unittest') self.assertTrue("None value(s) for ['user']; none of host,port,user,passwd or db must be None" in str(context.exception)) with self.assertRaises(Exception) as context: MySQLDB(host='localhost', port=3306, user='******', db=None) self.assertTrue("None value(s) for ['db']; none of host,port,user,passwd or db must be None" in str(context.exception)) with self.assertRaises(Exception) as context: MySQLDB(host='localhost', port=3306, user='******', passwd=None, db='unittest') self.assertTrue("None value(s) for ['passwd']; none of host,port,user,passwd or db must be None" in str(context.exception)) with self.assertRaises(Exception) as context: MySQLDB(host=None, port=3306, user=None, db=None) self.assertTrue("None value(s) for ['host', 'db', 'user']; none of host,port,user,passwd or db must be None" in str(context.exception)) except AssertionError: # Create a better message than 'False is not True'. # That useless msg is generated if an expected exception # above is NOT raised: raise AssertionError('Expected ValueError exception "%s" was not raised.' % context.exception.message) # Check data types of parameters: try: # One illegal type: host==10: with self.assertRaises(Exception) as context: # Integer instead of string for host: MySQLDB(host=10, port=3306, user='******', db='myDb') self.assertTrue("Value(s) ['host'] have bad type;host,user,passwd, and db must be strings; port must be int." in str(context.exception)) # Two illegal types: host and user: with self.assertRaises(Exception) as context: # Integer instead of string for host: MySQLDB(host=10, port=3306, user=30, db='myDb') self.assertTrue("Value(s) ['host', 'user'] have bad type;host,user,passwd, and db must be strings; port must be int." in str(context.exception)) # Port being string instead of required int: with self.assertRaises(Exception) as context: # Integer instead of string for host: MySQLDB(host='myHost', port='3306', user='******', db='myDb') self.assertTrue("Port must be an integer; was" in str(context.exception)) except AssertionError: # Create a better message than 'False is not True'. # That useless msg is generated if an expected exception # above is NOT raised: raise AssertionError('Expected ValueError exception "%s" was not raised.' % context.exception.message)
class ExtToAnonTableMaker(object): def __init__(self, extIdsFileName): user = '******' # Try to find pwd in specified user's $HOME/.ssh/mysql currUserHomeDir = os.getenv('HOME') if currUserHomeDir is None: pwd = None else: try: # Need to access MySQL db as its 'root': with open(os.path.join(currUserHomeDir, '.ssh/mysql_root')) as fd: pwd = fd.readline().strip() # Switch user to 'root' b/c from now on it will need to be root: user = '******' except IOError: # No .ssh subdir of user's home, or no mysql inside .ssh: pwd = None self.db = MySQLDB(user=user, passwd=pwd, db='Misc') self.makeTmpExtsTable() self.loadExtIds(extIdsFileName) outfile = tempfile.NamedTemporaryFile(prefix='extsIntsScreenNames', suffix='.csv', delete=True) # Need to close this file, and thereby delete it, # so that MySQL is willing to write to it. Yes, # that's a race condition. But this is an # admin script, run by one person: outfile.close() self.findScreenNames(outfile.name) self.computeAnonFromScreenNames(outfile.name) def makeTmpExtsTable(self): # Create table to load the CSV file into: self.externalsTblNm = self.idGenerator(prefix='ExternalsTbl_') mysqlCmd = 'CREATE TEMPORARY TABLE %s (ext_id varchar(32));' % self.externalsTblNm self.db.execute(mysqlCmd) def loadExtIds(self, csvExtsFileName): # Clean up line endings in the extIds file. # Between Win, MySQL, Mac, and R, we get # linefeeds and crs: cleanExtsFile = tempfile.NamedTemporaryFile(prefix='cleanExts', suffix='.csv', delete=False) os.chmod(cleanExtsFile.name, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) rawExtsFd = open(csvExtsFileName, 'r') for line in rawExtsFd: cleanExtsFile.write(line.strip() + '\n') cleanExtsFile.close() rawExtsFd.close() mysqlCmd = "LOAD DATA INFILE '%s' " % cleanExtsFile.name +\ 'INTO TABLE %s ' % self.externalsTblNm +\ "FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES;" self.db.execute(mysqlCmd) # Delete the cleaned-exts file: os.remove(cleanExtsFile.name) def findScreenNames(self, outCSVFileName): mysqlCmd = "SELECT 'ext_id','user_int_id','screen_name'" +\ "UNION " +\ "SELECT ext_id," +\ " user_int_id," +\ " username " +\ " INTO OUTFILE '%s'" % outCSVFileName +\ " FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n'" +\ " FROM " +\ " (SELECT ext_id," +\ " user_id AS user_int_id " +\ " FROM %s LEFT JOIN edxprod.student_anonymoususerid " % self.externalsTblNm +\ " ON %s.ext_id = edxprod.student_anonymoususerid.anonymous_user_id " % self.externalsTblNm +\ " ) AS ExtAndInts " +\ " LEFT JOIN edxprod.auth_user " +\ " ON edxprod.auth_user.id = ExtAndInts.user_int_id;" self.db.execute(mysqlCmd) def computeAnonFromScreenNames(self, extIntNameFileName): with open(extIntNameFileName, 'r') as inFd: print('ext_id,anon_screen_name') firstLineDiscarded = False for line in inFd: (extId, intId, screenName) = line.split(',') #@UnusedVariable #******** #print('ScreenName.strip(\'"\'): \'%s\'' % screenName.strip().strip('"')) #******** if firstLineDiscarded: screenName = screenName.strip().strip('"') if screenName == '\\N': print ('%s,%s' % (extId.strip('"'),'NULL')) else: print('%s,%s' % (extId.strip('"'),EdXTrackLogJSONParser.makeHash(screenName))) else: firstLineDiscarded = True def idGenerator(self, prefix='', size=6, chars=string.ascii_uppercase + string.digits): randPart = ''.join(random.choice(chars) for _ in range(size)) return prefix + randPart
class UserCountryTableCreator(object): DEST_TABLE = 'UserCountry' # Number of anon ids-country-2-letter-3-letter # tuples to accumulate before inserting into # UserCountry: INSERT_BULK_SIZE = 15000 def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') # Make sure table exists. It should, and it should be filled # with all anon_screen_name and countries up the previous # load: createCmd = '''CREATE TABLE UserCountry ( anon_screen_name varchar(40) NOT NULL DEFAULT "", two_letter_country varchar(2) NOT NULL DEFAULT "", three_letter_country varchar(3) NOT NULL DEFAULT "", country varchar(255) NOT NULL DEFAULT "" ) ENGINE=MyISAM; ''' self.db.dropTable('UserCountry') print("Creating table UserCountry...") self.db.execute(createCmd) print("Done creating table UserCountry.") def fillTable(self): query = "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract" query_res_it = self.db.query(query) done = False # Order of columns for insert: colNameTuple = ('anon_screen_name', 'two_letter_country', 'three_letter_country', 'country') while not done: values = [] print("%s: Starting one set of %s lookups..." %\ (str(datetime.datetime.today()), UserCountryTableCreator.INSERT_BULK_SIZE)) for _ in range(UserCountryTableCreator.INSERT_BULK_SIZE): try: (anon_screen_name, ip3LetterCountry) = query_res_it.next() except StopIteration: done = True break # Try translating: try: (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode( ip3LetterCountry) except (ValueError, TypeError, KeyError): twoLetterCode = 'XX' threeLetterCode = 'XXX' country = 'Not in lookup tbl' #sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`)) values.append( tuple([ '%s' % anon_screen_name, '%s' % twoLetterCode, '%s' % threeLetterCode, '%s' % country ])) # Insert this chunk into the UserCountry table print("%s: Inserting %s rows into UserCountry table..." % (str(datetime.datetime.today()), len(values))) (errors, warnings) = self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values) if errors is not None: print('Error(s) during UserCountry insert: %s' % errors) sys.exit(1) if warnings is not None: print('Warning(s) during UserCountry insert: %s' % warnings) print("%s: Done inserting %s rows into UserCountry table..." % (str(datetime.datetime.today()), len(values))) # ... and loop to process the next INSERT_BULK_SIZE batch def makeIndex(self): self.db.execute( "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);" ) self.db.execute( "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);" ) def close(self): self.db.close()
class ExportClassTest(unittest.TestCase): # Test data for one student in one class. Student is active in 2 of the # class' weeks: # # Week 4: # Session1: 15 total each week: Week4: 20 # Session2: 5 Week6: 72 # # Week 6: # Session3: 15 # Session4: 42 # Session5: 15 # ------------ # 92 # # Sessions in weeks: # week4: [20] ==> median = 20 # week6: [15,42,15] ==> median = 15 # # The engagement summary file for one class: # totalStudentSessions, totalEffortAllStudents, oneToTwentyMin, twentyoneToSixtyMin, greaterSixtyMin # 5 92 2 0 0 # # The all_data detail file resulting from the data: # Platform,Course,Student,Date,Time,SessionLength # 'OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15 # 'OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5 # 'OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15 # 'OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42 # 'OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15 # # The weekly effort file from the data: # platform,course,student,week,effortMinutes # 'OpenEdX,CME/MedStats/2013-2015,abc,4,20 # 'OpenEdX,CME/MedStats/2013-2015,abc,6,72 oneStudentTestData = [ ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 03:27:00", 0), # week 4; start session ("CME/MedStats/2013-2015", "abc", "load_video", "2013-08-30 03:27:20", 1), # 20sec (gets rounded to 0min) ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-08-30 03:37:00", 0), # 9min:40sec (gets rounded to 10min) # 0min + 10min + 5min = 15min ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 04:10:00", 0), # 5min ( "CME/MedStats/2013-2015", "abc", "load_video", "2013-09-14 03:27:24", 1, ), # week 6; 15min (for the single video) ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 03:27:25", 0), ("CME/MedStats/2013-2015", "abc", "page_close", "2013-09-15 03:30:35", 0), # 3min ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-15 03:59:00", 1), # 28min ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:05:00", 0), # 6min # 3min + 28min + 6min + 5min = 42 ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:36:54", 1), # 15 ] courseRuntimesData = [ ("CME/MedStats/2013-2015", "2013-07-30 03:27:00", "2013-10-30 03:27:00"), ("My/RealCourse/2013-2015", "2013-09-01 03:27:00", "2013-10-30 03:27:00"), ] userGradeData = [ (10, "CME/MedStats/2013-2015", "abc"), (20, "My/RealCourse/Summer2014", "def"), (30, "CME/MedStats/2013-2015", "def"), ] demographicsData = [("abc", "f", 1988, "hs", "USA", "United States"), ("def", "m", 1990, "p", "FRG", "Germany")] true_courseenrollmentData = [ (10, "CME/MedStats/2013-2015", "2013-08-30 03:27:00", "nomode"), (30, "CME/MedStats/2013-2015", "2014-08-30 03:27:00", "yesmode"), ] courseInfoData = [ ("CME/MedStats/2013-2015", "medStats", 2014, "fall", 1, 0, "2014-08-01", "2014-09-01", "2014-11-31") ] userCountryData = [("US", "USA", "abc", "United States"), ("DE", "DEU", "def", "Germany")] twoStudentsOneClassTestData = [ ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 03:27:00", 0), ("CME/MedStats/2013-2015", "abc", "load_video", "2013-08-30 03:27:20", 1), ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-08-30 03:37:00", 0), ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 04:10:00", 0), ("CME/MedStats/2013-2015", "def", "page_close", "2013-08-30 04:10:00", 1), # Second student ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-14 03:27:24", 1), ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 03:27:25", 0), ("CME/MedStats/2013-2015", "abc", "page_close", "2013-09-15 03:30:35", 0), ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-15 03:59:00", 1), ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:05:00", 0), ("CME/MedStats/2013-2015", "def", "page_close", "2013-09-16 04:10:00", 1), # Second student ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:36:54", 1), ] twoStudentsTwoClassesTestData = [ ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 03:27:00", 0), ("CME/MedStats/2013-2015", "abc", "load_video", "2013-08-30 03:27:20", 1), ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-08-30 03:37:00", 0), ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 04:10:00", 0), ("My/RealCourse/2013-2015", "def", "page_close", "2013-09-01 04:10:00", 1), # Second student ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-14 03:27:24", 1), ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 03:27:25", 0), ("CME/MedStats/2013-2015", "abc", "page_close", "2013-09-15 03:30:35", 0), ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-15 03:59:00", 1), ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:05:00", 0), ("My/RealCourse/2013-2015", "def", "page_close", "2013-09-16 04:10:00", 1), # Second student ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:36:54", 1), ] def setUp(self): application = None request = None # HTTPRequest.HTTPRequest() self.courseServer = CourseCSVServer(application, request, testing=True) try: self.mysqldb = MySQLDB(host="localhost", port=3306, user="******", db="unittest") except ValueError as e: self.fail( str(e) + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')" ) def tearDown(self): try: self.mysqldb.dropTable("unittest.Activities") self.mysqldb.close() except: pass @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testOneStudentOneClass(self): self.buildSupportTables(TestSet.ONE_STUDENT_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "CME/MedStats/2013-2015", "engagementData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) with open(self.courseServer.latestResultSummaryFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseSummaryLine) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,5,92,2,0,0\n", fd.readline()) with open(self.courseServer.latestResultDetailFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15\n", fd.readline()) with open(self.courseServer.latestResultWeeklyEffortFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseWeeklyLine) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,5,20\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,7,72\n", fd.readline()) os.remove(self.courseServer.latestResultSummaryFilename) os.remove(self.courseServer.latestResultDetailFilename) os.remove(self.courseServer.latestResultWeeklyEffortFilename) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testTwoStudentsOneClass(self): self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "CME/MedStats/2013-2015", "engagementData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) with open(self.courseServer.latestResultSummaryFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseSummaryLine) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,7,122,4,0,0\n", fd.readline()) with open(self.courseServer.latestResultDetailFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,2013-08-30,04:10:00,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,2013-09-16,04:10:00,15\n", fd.readline()) with open(self.courseServer.latestResultWeeklyEffortFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseWeeklyLine) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,5,20\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,7,72\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,5,15\n", fd.readline()) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,7,15\n", fd.readline()) os.remove(self.courseServer.latestResultSummaryFilename) os.remove(self.courseServer.latestResultDetailFilename) os.remove(self.courseServer.latestResultWeeklyEffortFilename) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testTwoStudentsTwoClasses(self): self.buildSupportTables(TestSet.TWO_STUDENTS_TWO_CLASSES) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "None", "engagementData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) with open(self.courseServer.latestResultSummaryFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseSummaryLine) # Read the rest of the summary lines, and # sort them just to ensure that we compare each # line to its ground truth: allSummaryLines = fd.readlines() allSummaryLines.sort() self.assertEqual("OpenEdX,CME/MedStats/2013-2015,5,92,2,0,0\n", allSummaryLines[0]) self.assertEqual("OpenEdX,My/RealCourse/2013-2015,2,30,2,0,0\n", allSummaryLines[1]) with open(self.courseServer.latestResultDetailFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() allDetailLines = fd.readlines() allDetailLines.sort() self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15\n", allDetailLines[0]) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5\n", allDetailLines[1]) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15\n", allDetailLines[2]) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42\n", allDetailLines[3]) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15\n", allDetailLines[4]) self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,2013-09-01,04:10:00,15\n", allDetailLines[5]) self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,2013-09-16,04:10:00,15\n", allDetailLines[6]) with open(self.courseServer.latestResultWeeklyEffortFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseWeeklyLine) allWeeklyLines = fd.readlines() allWeeklyLines.sort() self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,5,20\n", allWeeklyLines[0]) self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,7,72\n", allWeeklyLines[1]) self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,1,15\n", allWeeklyLines[2]) self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,3,15\n", allWeeklyLines[3]) os.remove(self.courseServer.latestResultSummaryFilename) os.remove(self.courseServer.latestResultDetailFilename) os.remove(self.courseServer.latestResultWeeklyEffortFilename) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testForumIsolated(self): self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "MITx/6.002x/2012_Fall", "forumData" : "True", "wipeExisting" : "True", "relatable" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) zipObj = zipfile.ZipFile(self.courseServer.latestForumFilename, "r") forumFd = zipObj.open("MITx_6.002x_2012_Fall_Forum.csv", "r", "foobar") forumExportHeader = ( "'forum_post_id','anon_screen_name','type','anonymous'," + "'anonymous_to_peers','at_position_list','forum_int_id','body'," + "'course_display_name','created_at','votes','count','down_count'," + "'up_count','up','down','comment_thread_id','parent_id','parent_ids'," + "'sk','confusion','happiness'\n" ) forum1stLine = "\"519461545924670200000001\",\"<anon_screen_name_redacted>\",\"CommentThread\",\"False\",\"False\",\"[]\",11,\"First forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': -6, 'down_count': 8, 'up': ['2', '10'], 'down': ['1', '3', '4', '5', '6', '7', '8', '9'], 'up_count': 2}\",10,8,2,\"['2', '10']\",\"['1', '3', '4', '5', '6', '7', '8', '9']\",\"None\",\"None\",\"None\",\"None\",\"none\",\"none\"" forum2ndLine = "\"519461545924670200000005\",\"<anon_screen_name_redacted>\",\"Comment\",\"False\",\"False\",\"[]\",7,\"Second forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': 4, 'down_count': 3, 'up': ['1', '2', '5', '6', '7', '8', '9'], 'down': ['3', '4', '10'], 'up_count': 7}\",10,3,7,\"['1', '2', '5', '6', '7', '8', '9']\",\"['3', '4', '10']\",\"519461545924670200000001\",\"None\",\"[]\",\"519461545924670200000005\",\"none\",\"none\"" header = forumFd.readline() self.assertEqual(forumExportHeader, header) self.assertEqual(forum1stLine, forumFd.readline().strip()) self.assertEqual(forum2ndLine, forumFd.readline().strip()) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testForumRelatable(self): self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "MITx/6.002x/2012_Fall", "forumData" : "True", "wipeExisting" : "True", "relatable" : "True", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) zipObj = zipfile.ZipFile(self.courseServer.latestForumFilename, "r") forumFd = zipObj.open("MITx_6.002x_2012_Fall_Forum.csv", "r", "foobar") forumExportHeader = ( "'forum_post_id','anon_screen_name','type','anonymous'," + "'anonymous_to_peers','at_position_list','forum_int_id','body'," + "'course_display_name','created_at','votes','count','down_count'," + "'up_count','up','down','comment_thread_id','parent_id','parent_ids'," + "'sk','confusion','happiness'\n" ) forum1stLine = "\"519461545924670200000001\",\"e07a3da71f0330452a6aa650ed598e2911301491\",\"CommentThread\",\"False\",\"False\",\"[]\",0,\"First forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': -6, 'down_count': 8, 'up': ['2', '10'], 'down': ['1', '3', '4', '5', '6', '7', '8', '9'], 'up_count': 2}\",10,8,2,\"['2', '10']\",\"['1', '3', '4', '5', '6', '7', '8', '9']\",\"None\",\"None\",\"None\",\"None\",\"none\",\"none\"" forum2ndLine = "\"519461545924670200000005\",\"e07a3da71f0330452a6aa650ed598e2911301491\",\"Comment\",\"False\",\"False\",\"[]\",0,\"Second forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': 4, 'down_count': 3, 'up': ['1', '2', '5', '6', '7', '8', '9'], 'down': ['3', '4', '10'], 'up_count': 7}\",10,3,7,\"['1', '2', '5', '6', '7', '8', '9']\",\"['3', '4', '10']\",\"519461545924670200000001\",\"None\",\"[]\",\"519461545924670200000005\",\"none\",\"none\"" header = forumFd.readline() self.assertEqual(forumExportHeader, header) self.assertEqual(forum1stLine, forumFd.readline().strip()) self.assertEqual(forum2ndLine, forumFd.readline().strip()) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testForumIsolatedCourseNotInForum(self): self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "Course/Not/Exists", "forumData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) os.path.exists(self.courseServer.latestForumFilename) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testDemographics(self): self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "testtest/MedStats/2013-2015", "demographics" : "True", "wipeExisting" : "True", "relatable" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) # Allow result to be computed: time.sleep(3) with open(self.courseServer.latestDemographicsFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseSummaryLine) # Read the rest of the summary lines, and # sort them just to ensure that we compare each # line to its ground truth: allDemographicsLines = fd.readlines() allDemographicsLines.sort() # abc,f,1988,hs,USA,United States self.assertEqual('"abc","f","1988","hs","USA","United States"', allDemographicsLines[0].strip()) self.assertEqual('"def","m","1990","p","FRG","Germany"', allDemographicsLines[1].strip()) os.remove(self.courseServer.latestDemographicsFilename) # ******@unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testQuarterlyDemographics(self): self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS) jsonMsg = '{"req" : "getData", "args" : {"courseId" : "testtest/MedStats/2013-2015", "quarterRep": "True", "quarterRepDemographics" : "True", "quarterRepQuarter" : "fall", "quarterRepYear": "2014", "wipeExisting" : "True", "relatable" : "False", "cryptoPwd" : "foobar"}}' self.courseServer.on_message(jsonMsg) # Allow result to be computed: time.sleep(3) with open(self.courseServer.latestQuarterlyDemographicsFilename, "r") as fd: # Read and discard the csv file's header line: fd.readline() # print(courseSummaryLine) # Read the rest of the summary lines, and # sort them just to ensure that we compare each # line to its ground truth: allDemographicsLines = fd.readlines() # allDemographicsLines.sort() self.assertEqual( "openedx,CME/MedStats/2013-2015,1,1,0,0,2,1,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0", allDemographicsLines[0].strip(), ) os.remove(self.courseServer.latestDemographicsFilename) @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testZipFiles(self): file1 = tempfile.NamedTemporaryFile() file2 = tempfile.NamedTemporaryFile() file1.write("foo") file2.write("bar") file1.flush() file2.flush() self.courseServer.zipFiles("/tmp/zipFileUnittest.zip", "foobar", [file1.name, file2.name]) # Read it all back: zipfile.ZipFile("/tmp/zipFileUnittest.zip").extractall(pwd="foobar") @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testExportPIIDetails(self): pass @unittest.skipIf(not TEST_ALL, "Temporarily disabled") def testLearnerPerformance(self): pass def buildSupportTables(self, testSetToLoad): # Activities table: schema = OrderedDict( [ ("course_display_name", "varchar(255)"), ("anon_screen_name", "varchar(40)"), ("event_type", "varchar(120)"), ("time", "datetime"), ("isVideo", "TINYINT"), ] ) self.mysqldb.dropTable("unittest.Activities") self.mysqldb.createTable("unittest.Activities", schema) colNames = ["course_display_name", "anon_screen_name", "event_type", "time", "isVideo"] if testSetToLoad == TestSet.ONE_STUDENT_ONE_CLASS: colValues = ExportClassTest.oneStudentTestData elif testSetToLoad == TestSet.TWO_STUDENTS_ONE_CLASS: colValues = ExportClassTest.twoStudentsOneClassTestData elif testSetToLoad == TestSet.TWO_STUDENTS_TWO_CLASSES: colValues = ExportClassTest.twoStudentsTwoClassesTestData else: raise ValueError("Requested test set unavailable: %s" % testSetToLoad) self.mysqldb.bulkInsert("Activities", colNames, colValues) # Course runtimes: schema = OrderedDict( [ ("course_display_name", "varchar(255)"), ("course_start_date", "datetime"), ("course_end_date", "datetime"), ] ) self.mysqldb.dropTable("unittest.CourseRuntimes") self.mysqldb.createTable("unittest.CourseRuntimes", schema) colNames = ["course_display_name", "course_start_date", "course_end_date"] colValues = ExportClassTest.courseRuntimesData self.mysqldb.bulkInsert("CourseRuntimes", colNames, colValues) # UserGrade: schema = OrderedDict( [("user_int_id", "int"), ("course_id", "varchar(255)"), ("anon_screen_name", "varchar(40)")] ) self.mysqldb.dropTable("unittest.UserGrade") self.mysqldb.createTable("unittest.UserGrade", schema) colNames = ["user_int_id", "course_id", "anon_screen_name"] colValues = ExportClassTest.userGradeData self.mysqldb.bulkInsert("UserGrade", colNames, colValues) # true_courseenrollment schema = OrderedDict( [ ("user_id", "int"), ("course_display_name", "varchar(255)"), ("created", "datetime"), ("mode", "varchar(10)"), ] ) self.mysqldb.dropTable("unittest.true_courseenrollment") self.mysqldb.createTable("unittest.true_courseenrollment", schema) colNames = ["user_id", "course_display_name", "created", "mode"] colValues = ExportClassTest.true_courseenrollmentData self.mysqldb.bulkInsert("unittest.true_courseenrollment", colNames, colValues) # UserCountry: schema = OrderedDict( [ ("two_letter_country", "varchar(2)"), ("three_letter_country", "varchar(3)"), ("anon_screen_name", "varchar(40)"), ("country", "varchar(255)"), ] ) self.mysqldb.dropTable("unittest.UserCountry") self.mysqldb.createTable("unittest.UserCountry", schema) colNames = ["two_letter_country", "three_letter_country", "anon_screen_name", "country"] colValues = ExportClassTest.userCountryData self.mysqldb.bulkInsert("unittest.UserCountry", colNames, colValues) # Demographics schema = OrderedDict( [ ("anon_screen_name", "varchar(40)"), ("gender", "varchar(255)"), ("year_of_birth", "int(11)"), ("level_of_education", "varchar(42)"), ("country_three_letters", "varchar(3)"), ("country_name", "varchar(255)"), ] ) self.mysqldb.dropTable("unittest.Demographics") self.mysqldb.execute("DROP VIEW IF EXISTS unittest.Demographics") self.mysqldb.createTable("unittest.Demographics", schema) colNames = [ "anon_screen_name", "gender", "year_of_birth", "level_of_education", "country_three_letters", "country_name", ] colValues = ExportClassTest.demographicsData self.mysqldb.bulkInsert("unittest.Demographics", colNames, colValues) # Quarterly Report Demographics: # CourseInfo: schema = OrderedDict( [ ("course_display_name", "varchar(255)"), ("course_catalog_name", "varchar(255)"), ("academic_year", "int"), ("quarter", "varchar(7)"), ("num_quarters", "int"), ("is_internal", "tinyint"), ("enrollment_start", "datetime"), ("start_date", "datetime"), ("end_date", "datetime"), ] ) self.mysqldb.dropTable("unittest.CourseInfo") self.mysqldb.createTable("unittest.CourseInfo", schema) colNames = [ "course_display_name", "course_catalog_name", "academic_year", "quarter", "num_quarters", "is_internal", "enrollment_start", "start_date", "end_date", ] colValues = ExportClassTest.courseInfoData self.mysqldb.bulkInsert("unittest.CourseInfo", colNames, colValues) # Forum table: # This tables gets loaded via a .sql file imported into mysql. # That file drops any existing unittest.contents, so we # don't do that here: mysqlCmdFile = "data/forumTests.sql" mysqlLoadCmd = ["mysql", "-u", "unittest"] with open(mysqlCmdFile, "r") as theStdin: # Drop table unittest.contents, and load a fresh copy: subprocess.call(mysqlLoadCmd, stdin=theStdin)
class TestForumEtl(unittest.TestCase): # Forum rows have the following columns: # type, anonymous, anonymous_to_peers, at_position_list, user_int_id, body, course_display_name, created_at, votes, count, down_count, up_count, up, down, comment_thread_id, parent_id, parent_ids, sk # Correct result for relationization of tinyForum.json # (in <projDir>/src/forum_etl/data). This result is anonymized and not relatable, # i.e. poster name UIDs use integers, while other tables use hashes: tinyForumGoldAnonymized = \ [ # poster Otto van Homberg: body is clean to start with: ('anon_screen_name_redacted','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None), # poster Andreas Fritz: body has someone's email: ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'), # poster Otto van Homberg: body has 'Otto': ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'), # poster Andreas Fritz: body has a phone number: ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'), # poster Otto van Homberg: body has his screen name (otto_king): ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'), # poster Otto van Homberg: body has his full name (Otto van Homberg): ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007') ] # Gold result for anonymization that allows relating to other tables (i.e. hashes are constant) tinyForumGoldRelatable = \ [ # poster Otto van Homberg: body is clean to start with: ('abc','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None), # poster Andreas Fritz: body has someone's email: ('def','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'), # poster Otto van Homberg: body has 'Otto': ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'), # poster Andreas Fritz: body has a phone number: ('ghi','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'), # poster Otto van Homberg: body has his screen name (otto_king): ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'), # poster Otto van Homberg: body has his full name (Otto van Homberg): ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> <nameRedac_abc> <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007') ] # Gold result for non-anonymized forum: tinyForumGoldClear = \ [ # poster Otto van Homberg: body is clean to start with: ('otto_king','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None), # poster Andreas Fritz: body has someone's email: ('fritzL','Comment', 'False', 'False', '[]', 7L, ' Body with [email protected] email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'), # poster Otto van Homberg: body has 'Otto': ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster name Otto embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'), # poster Andreas Fritz: body has a phone number: ('bebeW','Comment', 'False', 'False', '[]', 10L, 'Body with 650-333-4567 a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'), # poster Otto van Homberg: body has his screen name (otto_king): ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name otto_king embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'), # poster Otto van Homberg: body has his full name (Otto van Homberg): ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name Otto van Homberg embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007') ] def setUp(self): self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum") # Fill the little MongoDB with test JSON lines self.resetMongoTestDb() self.mysqldb = MySQLDB(user='******', db='unittest') # Start with an empty result MySQL table for each test: self.mysqldb.dropTable('contents') # Fill the fake UserGrade table with records of course participants: self.resetMySQLUserListDb() # Instantiate a Forum scrubber without the # name of a bson file that contains forum # records. That 'None' for the bson file will # make the class understand that it's being # instantiated for a unit test. self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade') self.forumScrubberRelatable = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True) self.forumScrubberClear = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False) def tearDown(self): self.mysqldb.close() @unittest.skipIf(not RUN_ALL_TESTS, 'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.') def testAnonymized(self): self.forumScrubberAnonymized.populateUserCache() self.forumScrubberAnonymized.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents') for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')): # print(str(rowNum) + ':' + str(forumPost)) self.assertEqual(TestForumEtl.tinyForumGoldAnonymized[rowNum], forumPost) @unittest.skipIf(not RUN_ALL_TESTS, 'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.') def testNonAnonymizedRelatable(self): self.forumScrubberRelatable.populateUserCache() self.forumScrubberRelatable.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents') for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')): # print(str(rowNum) + ':' + str(forumPost)) self.assertEqual(TestForumEtl.tinyForumGoldRelatable[rowNum], forumPost) @unittest.skipIf(not RUN_ALL_TESTS, 'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.') def testNonAnonymized(self): self.forumScrubberClear.populateUserCache() self.forumScrubberClear.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents') for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')): # print(str(rowNum) + ':' + str(forumPost)) self.assertEqual(TestForumEtl.tinyForumGoldClear[rowNum], forumPost) def resetMongoTestDb(self): self.mongoDb.clearCollection() # Use small, known forum collection: currDir = os.path.dirname(__file__) with open(os.path.join(currDir, 'data/tinyForum.json'), 'r') as jsonFd: for line in jsonFd: forumPost = json.loads(line) self.mongoDb.insert(forumPost) def resetMySQLUserListDb(self): ''' Prepare a MySQL table that mimicks EdxPrivate.UserGrade. ''' userGradeColSpecs = OrderedDict( { 'name' : 'varchar(255)', 'screen_name' : 'varchar(255)', 'grade' : 'int', 'course_id' : 'varchar(255)', 'distinction' : 'tinyint', 'status' : 'varchar(50)', 'user_int_id' : 'int(11)', 'anon_screen_name' : 'varchar(40)' }) self.mysqldb.dropTable('UserGrade') self.mysqldb.createTable('UserGrade', userGradeColSpecs) self.mysqldb.bulkInsert('UserGrade', ('name','screen_name','grade','course_id','distinction','status','user_int_id','anon_screen_name'), [ ('Otto van Homberg','otto_king',5,'oldCourse',0,'notpassing',5,'abc'), ('Andreas Fritz','fritzL',2,'newCourse',0,'notpassing',7,'def'), ('Bebe Winter', 'bebeW',10,'History of Baking',1,'passing',10,'ghi') ])
class CoursesGivenQuarter(object): ''' Bus module that queries datastage for course information, given academic year, and quarter. ''' MYSQL_PORT_LOCAL = 5555 module_topic = 'course_listing' def __init__(self, topic=None, user='******', passwd=''): ''' Instantiated for each incoming bus message ''' if topic is None: topic = CoursesGivenQuarter.module_topic self.mysqldb = MySQLDB(host='127.0.0.1', port=CoursesGivenQuarter.MYSQL_PORT_LOCAL, user=user, passwd=passwd, db='Edx') # The following statement is needed only # if your callback is a method (rather than a top # level function). That's because Python methods # take 'self' as a first argument, while the Bus # expects a function that just takes topicName, msgText, and msgOffset. # The following statement creates a function wrapper around # our callback method that has the leading 'self' parameter built # in. The process is called function currying: self.requestDeliveryMethod = functools.partial(self.requestCoursesForQuarter) # Create a BusAdapter instance: self.bus = BusAdapter() # Tell the bus that you are interested in the topic 'example_use', # and want callbacks to self.exampleDeliveryMethod whenever # a message arrives: self.bus.subscribeToTopic(topic, self.requestDeliveryMethod) # Now we do nothing. In a production system you # would do something useful here: while True: # do anything you like self.bus.waitForMessage(CoursesGivenQuarter.module_topic) def requestCoursesForQuarter(self, topicName, msgText, msgOffset): ''' This method is called whenever a message in topic 'course_listing' is published by anyone on the bus. The msgText should have the JSON format: {'id' : 'abcd' 'content' : {'academic_year' : '2014', 'quarter' : 'spring'}, 'time' : '2015-05-27T18:12:22.706204', } Response will be of the form: {'id' : 'abcd', 'status' : 'OK' 'content' : ***** } Or, in case of error: {'id' : 'abcd', 'status' : 'ERROR' 'content' : '<error msg'> } :param topicName: name of topic to which the arriving msg belongs: always learner_homework_history :type topicName: string :param msgText: text part of the message. JSON as specified above. :type msgText: string :param msgOffset: position of message in the topic's message history :type msgOffset: int ''' try: # Import the message into a dict: msgDict = json.loads(msgText) except ValueError: self.bus.logError('Received msg with invalid wrapping JSON: %s (%s)' % str(msgText)) return # Must have a learner message id: try: reqId = msgDict['id'] except KeyError: self.returnError('NULL', "Error: message type not provided in an incoming request.") self.bus.logError("Message type not provided in %s" % str(msgDict)) return # Must have a learner type == 'req' try: reqKey = msgDict['type'] if reqKey != 'req': return except KeyError: self.returnError(reqId, "Error: message type not provided in %s" % str(msgDict)) self.bus.logError('Received msg without a type field: %s' % str(msgText)) return # The content field should be legal JSON; make a # dict from it: try: contentDict = msgDict['content'] except KeyError: self.returnError(reqKey, "Error: no content field provided in %s" % str(msgDict)) self.bus.logError('Received msg without a content field: %s' % str(msgText)) return # Must have an academic year: try: academicYear = contentDict['academic_year'] except KeyError: self.returnError(reqKey, "Error: academic year not provided in %s" % str(msgDict)) self.bus.logError('Received msg without academic year in content field: %s' % str(msgText)) return # Must have a quarter: try: quarter = contentDict['quarter'] except KeyError: self.returnError(reqKey, "Error: quarter not provided in %s" % str(msgDict)) self.bus.logError('Received msg without quarter in content field: %s' % str(msgText)) return # Get an array of dicts, each dict being one MySQL record: # course_display_name, # course_catalog_name, # is_internal resultArr = self.executeCourseInfoQuery(academicYear, quarter) # Turn result into an HTML table: htmlRes = self.buildHtmlTableFromQueryResult(resultArr) # Note that we pass the message type 'resp' # to publish(), and that we specify that the # msg ID is to be the same as the incoming request. self.bus.publish(htmlRes, CoursesGivenQuarter.module_topic, msgType='resp', msgId=reqId) def executeCourseInfoQuery(self, academicYear, quarter): homeworkQuery = "SELECT course_display_name," +\ "course_catalog_name," +\ "is_internal " +\ "FROM CourseInfo " +\ "WHERE academic_year = '%s' " % academicYear +\ " AND quarter = '%s' " % quarter +\ ";" try: resIt = self.mysqldb.query(homeworkQuery) except Exception as e: self.returnError("Error: Call to database returned an error: '%s'" % `e`) self.bus.logError("Call to MySQL returned an error: '%s'" % `e`) return resultArr = [] for res in resIt: resultArr.append(res) return resultArr def returnError(self, req_id, errMsg): self.bus.publish(errMsg, CoursesGivenQuarter.module_topic, msgId=req_id, msgType='resp') def buildHtmlTableFromQueryResult(self, resTupleArr): htmlStr = '<table border=1><tr><td><b>Course</b></td><td><b>Description</b></td><td><b>Internal-Only</b></td></tr>' strResArr = [] for (courseDisplayName, courseCatalogName, isInternal) in resTupleArr: strResArr.append("<tr><td>%s</td><td>%s</td><td>%s</td></tr>" % (courseDisplayName, courseCatalogName, isInternal)) htmlStr = htmlStr + ' '.join(strResArr) + '</table>' return htmlStr def close(self): try: self.mysqldb.close() except: pass
class UserCountryTableCreator(object): DEST_TABLE = 'UserCountry' def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') self.db.dropTable(UserCountryTableCreator.DEST_TABLE) self.db.createTable(UserCountryTableCreator.DEST_TABLE, OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""', 'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""', 'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""', 'country' : 'varchar(255) NOT NULL DEFAULT ""'})) def fillTable(self): values = [] for (user, ip3LetterCountry) in self.db.query("SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"): try: (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry) except (ValueError,TypeError,KeyError) as e: sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`)) continue values.append(tuple(['%s'%user,'%s'%twoLetterCode,'%s'%threeLetterCode,'%s'%country])) colNameTuple = ('anon_screen_name','two_letter_country','three_letter_country','country') self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values) def makeIndex(self): self.db.execute("CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);") self.db.execute("CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);") def close(self): self.db.close()