def tearDown(self):
     self.db = MySQLDB(user='******', passwd='', db='unittest')
     # Can't drop tables: hangs
     #self.db.dropTable('StudentmoduleExcerpt')
     #self.db.dropTable('ActivityGrade')
     self.db.close()
     pass
Esempio n. 2
0
    def testWithMySQLPassword(self):
        
        try:
            # Set a password for the unittest user:
            if self.mysql_ge_5_7:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = 'foobar'")
            else:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = PASSWORD('foobar')")

            self.mysqldb.close()
            
            # We should be unable to log in without a pwd:
            with self.assertRaises(ValueError):
                self.mysqldb = MySQLDB(host='localhost', user='******', db='unittest')
                
            # Open new pymysql_db.MySQLDb instance, supplying pwd: 
            self.mysqldb = MySQLDB(host='localhost', user='******', passwd='foobar', db='unittest')
            # Do a test query:
            self.buildSmallDb()
            res = self.mysqldb.query("SELECT col2 FROM unittest WHERE col1 = 10;").next()
            self.assertEqual(res, 'col1')
            
            # Bulk insert is also different for pwd vs. none:
            self.testBulkInsert()
        finally:
            # Make sure the remove the pwd from user unittest,
            # so that other tests will run successfully:
            if self.mysql_ge_5_7:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = ''")
            else:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = PASSWORD('')")
Esempio n. 3
0
    def testQueryIterator(self):
        self.buildSmallDb()

        for rowNum, result in enumerate(self.mysqldb.query('SELECT col1,col2 FROM unittest')):
            if rowNum == 0:
                self.assertEqual((10, 'col1'), result)
            elif rowNum == 1:
                self.assertEqual((20, 'col2'), result)
            elif rowNum == 2:
                self.assertEqual((30, 'col3'), result)

        # Test the dict cursor
        self.mysqldb.close()
        self.mysqldb = MySQLDB(host='localhost',
                               user='******',
                               db='unittest',
                               cursor_class=Cursors.DICT)
        
        for result in self.mysqldb.query('SELECT col1,col2 FROM unittest'):
          
            self.assertIsInstance(result, dict)
            
            if result['col1'] == 10:
                self.assertEqual(result['col2'], 'col1')
            elif result['col1'] == 20:
                self.assertEqual(result['col2'], 'col2')
            elif result['col1'] == 30:
                self.assertEqual(result['col2'], 'col3')
    def setUp(self):
        self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[
            0]
        for colName in TestAddAnonToActivityGrade.studentmoduleExcerptColNames[
                1:]:
            self.allColNames += ',' + colName

        self.db = MySQLDB(user='******', passwd='', db='unittest')
        self.db.dropTable('StudentmoduleExcerpt')
        self.db.createTable(
            'StudentmoduleExcerpt',
            TestAddAnonToActivityGrade.studentmoduleExcerptSchema,
            temporary=False)
        #***temporary=True)
        self.db.bulkInsert(
            'StudentmoduleExcerpt',
            TestAddAnonToActivityGrade.studentmoduleExcerptColNames,
            TestAddAnonToActivityGrade.studentmoduleExcerptValues)

        self.db.createTable(
            'ActivityGrade',
            TestAddAnonToActivityGrade.studentmoduleExcerptSchema)
        # Make sure there isn't left over content (if the table existed):
        self.db.truncateTable('ActivityGrade')

        # Rudimentary UserGrade table:
        self.db.dropTable('UserGrade')
        self.db.createTable('UserGrade',
                            TestAddAnonToActivityGrade.userGradeExcerptSchema,
                            temporary=False)
        self.db.bulkInsert('UserGrade',
                           TestAddAnonToActivityGrade.userGradeExcerptColNames,
                           TestAddAnonToActivityGrade.userGradeExcerptValues)

        self.db.close()
    def __init__(self, extIdsFileName):

        user = '******'
        # Try to find pwd in specified user's $HOME/.ssh/mysql
        currUserHomeDir = os.getenv('HOME')
        if currUserHomeDir is None:
            pwd = None
        else:
            try:
                # Need to access MySQL db as its 'root':
                with open(os.path.join(currUserHomeDir,
                                       '.ssh/mysql_root')) as fd:
                    pwd = fd.readline().strip()
                # Switch user to 'root' b/c from now on it will need to be root:
                user = '******'

            except IOError:
                # No .ssh subdir of user's home, or no mysql inside .ssh:
                pwd = None

        self.db = MySQLDB(user=user, passwd=pwd, db='Misc')

        self.makeTmpExtsTable()
        self.loadExtIds(extIdsFileName)
        outfile = tempfile.NamedTemporaryFile(prefix='extsIntsScreenNames',
                                              suffix='.csv',
                                              delete=True)
        # Need to close this file, and thereby delete it,
        # so that MySQL is willing to write to it. Yes,
        # that's a race condition. But this is an
        # admin script, run by one person:
        outfile.close()
        self.findScreenNames(outfile.name)
        self.computeAnonFromScreenNames(outfile.name)
 def __init__(self, logFile, uid, pwd, tsvFileName, screenNamePos):
     '''
     Make connection to MySQL wrapper.
     @param logFile: file where log entries will be appended.
     @type logFile: String
     @param uid: MySQL user under which to log in. Assumed to be other than None
     @type uid: String
     @param pwd: MySQL password for user uid. May be None.
     @type pwd: {String | None}
     @param tsvFileName: name of TSV file where rows of edxprod's
            certificates_generatedcertificate table are located.
            It is assumed that the caller verified existence and
            readability of this file.
     @type String
     @param screenNamePos: Zero-origin position of the screen name column
            in the TSV file from certificates_generatedcertificate
     @type screenNamePos: int
     '''
     self.uid = uid
     self.pwd = pwd
     self.tsvFileName = tsvFileName
     self.screenNamePos = screenNamePos
     self.logFile = logFile
     
     if pwd is None:
         self.mysqldb = MySQLDB(user=uid, db='EdxPrivate')
     else:
         self.mysqldb = MySQLDB(user=uid, passwd=pwd, db='EdxPrivate')
Esempio n. 7
0
    def __init__(self, 
                 bsonFileName, 
                 mysqlDbObj=None, 
                 forumTableName='contents', 
                 allUsersTableName='EdxPrivate.UserGrade',
                 anonymize=True,
                 allowAnonScreenName=False):
        '''
        Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired),
        and place them into a MySQL table.  
        
        :param bsonFileName: full path the .bson table. Set to None if instantiating
            for unit testing.
        :type bsonFileName: String
        :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are
            to be placed. If None, a new such object is created into MySQL db 'EdxForum'
        :type mysqlDbObj: MySQLDB
        :param forumTableName: name of table into which anonymized Forum entries are to be placed
        :type forumTableName: String
        :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names
            of users who post to the Forum. Used to redact their names from their own posts.
        :type allUsersTable: String
        :param anonymize: If true, Forum post entries in the MySQL table will be anonymized
        :type anonymize: bool
        :param allow_anon_screen_name: if True, then occurrences of poster's name in
            post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name
            is the hash used in other tables of the OpenEdX data.
        :type allow_anon_screen_name: Bool 
        '''
        
        self.bsonFileName = bsonFileName
        self.forumTableName = forumTableName
        self.forumDbName = 'EdxForum'
        self.allUsersTableName = allUsersTableName
        self.anonymize = anonymize
        self.allowAnonScreenName = allowAnonScreenName
        
        # If not unittest, but regular run, then mysqlDbObj is None
        if mysqlDbObj is None:
            self.mysql_passwd = self.getMySQLPasswd()
            self.mysql_dbhost ='localhost'
            self.mysql_user = getpass.getuser() # mySQLUser that started this process
            self.mydb = MySQLDB(mySQLUser=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName)
        else:
            self.mydb = mysqlDbObj

        self.counter=0
        
        self.userCache = {}
        self.userSet   = set()

        warnings.filterwarnings('ignore', category=MySQLdb.Warning)        
        self.setupLogging()
        self.prepDatabase()
Esempio n. 8
0
 def setUp(self):
     if not TestPymysqlUtils.env_ok:
         raise RuntimeError(TestPymysqlUtils.err_msg)
     try:
         self.mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
     except ValueError as e:
         self.fail(str(e) + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')")
         
     # Make MySQL version more convenient to check:
     if (TestPymysqlUtils.major == 5 and TestPymysqlUtils.minor >= 7) or \
         TestPymysqlUtils.major >= 8:
         self.mysql_ge_5_7 = True
     else:
         self.mysql_ge_5_7 = False
 def __init__(self, user, pwd):
     self.ipCountryXlater = IpCountryDict()
     self.user = user
     self.pwd = pwd
     self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
     self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
     self.db.createTable(
         UserCountryTableCreator.DEST_TABLE,
         OrderedDict({
             'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""',
             'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""',
             'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""',
             'country': 'varchar(255) NOT NULL DEFAULT ""'
         }))
 def __init__(self, extIdsFileName):
     
     user = '******'
     # Try to find pwd in specified user's $HOME/.ssh/mysql
     currUserHomeDir = os.getenv('HOME')
     if currUserHomeDir is None:
         pwd = None
     else:
         try:
             # Need to access MySQL db as its 'root':
             with open(os.path.join(currUserHomeDir, '.ssh/mysql_root')) as fd:
                 pwd = fd.readline().strip()
             # Switch user to 'root' b/c from now on it will need to be root:
             user = '******'
             
         except IOError:
             # No .ssh subdir of user's home, or no mysql inside .ssh:
             pwd = None
     
     self.db = MySQLDB(user=user, passwd=pwd, db='Misc')
     
     self.makeTmpExtsTable()
     self.loadExtIds(extIdsFileName)
     outfile = tempfile.NamedTemporaryFile(prefix='extsIntsScreenNames', suffix='.csv', delete=True)
     # Need to close this file, and thereby delete it,
     # so that MySQL is willing to write to it. Yes,
     # that's a race condition. But this is an
     # admin script, run by one person:
     outfile.close()
     self.findScreenNames(outfile.name)
     self.computeAnonFromScreenNames(outfile.name)
Esempio n. 11
0
    def setUpClass(cls):
        super(AuxTableCopyTester, cls).setUpClass()
        
        # Read config file to see which MySQL server test_host we should
        # run the tests on. If setup.py does not exist, copy
        # setupSample.py to setup.py:
        
        config_info = ConfigInfo()
        cls.utils       = Utilities()
        
        test_host       = config_info.test_default_host
        user            = config_info.test_default_user

        cls.test_host = test_host
        cls.user      = user
        
        mysql_pwd = cls.mysql_pwd = cls.utils.get_db_pwd(test_host,
                                                         unittests=True)

        cls.mysql_pwd = mysql_pwd
        
        db = AuxTableCopyTester.db = MySQLDB(user=user, 
                                             passwd=mysql_pwd, 
                                             db='information_schema', 
                                             host=test_host)
        # If not working on localhost, where we expect a db
        # 'Unittest" Ensure there is a unittest db for us to work in.
        # We'll delete it later:
        
        if test_host == 'localhost':
            cls.db_name = 'Unittest'
            cls.mysql_pwd = ''
        else:
            cls.db_name = UnittestDbFinder(db).db_name
        db.close()
 def tearDown(self):
     self.db = MySQLDB(user='******', passwd='', db='unittest')
     # Can't drop tables: hangs
     #self.db.dropTable('StudentmoduleExcerpt')
     #self.db.dropTable('ActivityGrade')
     self.db.close()
     pass
 def setUp(self):
     self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[0]
     for colName in  TestAddAnonToActivityGrade.studentmoduleExcerptColNames[1:]:
         self.allColNames += ',' + colName
     
     self.db = MySQLDB(user='******', passwd='', db='unittest')
     self.db.dropTable('StudentmoduleExcerpt')
     self.db.createTable('StudentmoduleExcerpt', 
                         TestAddAnonToActivityGrade.studentmoduleExcerptSchema,
                         temporary=False)
                         #***temporary=True)
     self.db.bulkInsert('StudentmoduleExcerpt',
                        TestAddAnonToActivityGrade.studentmoduleExcerptColNames,
                        TestAddAnonToActivityGrade.studentmoduleExcerptValues)
     
     self.db.createTable('ActivityGrade', TestAddAnonToActivityGrade.studentmoduleExcerptSchema)
     # Make sure there isn't left over content (if the table existed):
     self.db.truncateTable('ActivityGrade')
     
     # Rudimentary UserGrade table:
     self.db.dropTable('UserGrade')
     self.db.createTable('UserGrade', 
                         TestAddAnonToActivityGrade.userGradeExcerptSchema,
                         temporary=False)
     self.db.bulkInsert('UserGrade',
                        TestAddAnonToActivityGrade.userGradeExcerptColNames,
                        TestAddAnonToActivityGrade.userGradeExcerptValues)
     
     
     self.db.close()
 def __init__(self, user, pwd):
     self.ipCountryXlater = IpCountryDict()
     self.user = user
     self.pwd = pwd
     self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
     # Make sure table exists. It should, and it should be filled
     # with all anon_screen_name and countries up the previous
     # load:
     self.db.createTable(
         UserCountryTableCreator.DEST_TABLE,
         OrderedDict({
             'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""',
             'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""',
             'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""',
             'country': 'varchar(255) NOT NULL DEFAULT ""'
         }))
 def ensureOpenMySQLDb(self):
     try:
         with open('/home/%s/.ssh/mysql' % self.currUser, 'r') as fd:
             self.mySQLPwd = fd.readline().strip()
             self.mysqlDb = MySQLDB(user=self.currUser,
                                    passwd=self.mySQLPwd,
                                    db=self.mainThread.defaultDb)
     except Exception:
         try:
             # Try w/o a pwd:
             self.mySQLPwd = None
             self.mysqlDb = MySQLDB(user=self.currUser, db=self.defaultDb)
         except Exception as e:
             # Remember the error msg for later:
             self.dbError = ` e `
             self.mysqlDb = None
     return self.mysqlDb
Esempio n. 16
0
    def __init__(self, majors_table='sankey'):
        '''
        '''

        self.majors_table = majors_table
        self.mysql_passwd = self.getMySQLPasswd()
        self.mysql_dbhost = 'localhost'
        self.mysql_user = getpass.getuser(
        )  # mySQLUser that started this process
        self.mydb = MySQLDB(user=self.mysql_user,
                            passwd=self.mysql_passwd,
                            db=self.majors_table)

        (nodes, links) = self.get_nodes_and_links()
        SankeyDiagram.plot_sankey(nodes,
                                  links,
                                  plot_title="Majors Transitions")
Esempio n. 17
0
 def log_into_mysql(self, user, db_pwd, db=None, host='localhost', **kwargs):
     
     try:
         # Try logging in, specifying the database in which all the tables
         # will be created: 
         db = MySQLDB(user=user, passwd=db_pwd, db=db, host=host, **kwargs)
     except ValueError as e:
         # Does the db not exist yet?
         if str(e).find("OperationalError(1049,") > -1:
             # Log in, specifying an always present db to 'use':
             db =  MySQLDB(user=user, passwd=db_pwd, db='information_schema', host=host)
             # Create the db:
             db.execute('CREATE DATABASE %s;' % self.config_info.canvas_db_aux)
         else:
             raise DatabaseError(f"Cannot open Canvas database:\n{repr(e)}")
     except Exception as e:
         raise DatabaseError(f"Cannot open Canvas database:\n{repr(e)}")
     
     # Work in UTC, b/c default on Mac MySQL 8 is local time,
     # on Centos MySQL 5.7 is UTC; it's a mess:
     
     (err, _warn) = db.execute('SET @@session.time_zone = "+00:00"')
     if err is not None:
         self.log_warn(f"Cannot set session time zone to UTC: {repr(err)}")
     
     return db
 def __init__(self, user, pwd):
     self.ipCountryXlater = IpCountryDict()
     self.user = user
     self.pwd = pwd
     self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
     # Make sure table exists. It should, and it should be filled
     # with all anon_screen_name and countries up the previous
     # load:
     createCmd = '''CREATE TABLE UserCountry (
                      anon_screen_name varchar(40) NOT NULL DEFAULT "",
                      two_letter_country varchar(2) NOT NULL DEFAULT "",
                      three_letter_country varchar(3) NOT NULL DEFAULT "",
                      country varchar(255) NOT NULL DEFAULT ""
                      ) ENGINE=MyISAM;
                      '''
     self.db.dropTable('UserCountry')
     print("Creating table UserCountry...")
     self.db.execute(createCmd)
     print("Done creating table UserCountry.")
Esempio n. 19
0
    def testCreateTempTable(self):
        mySchema = {
          'col1' : 'INT',
          'col2' : 'varchar(255)',
          'col3' : 'FLOAT',
          'col4' : 'TEXT',
          #'col5' : 'JSON'  # Only works MySQL 5.7 and up.
          }
        self.mysqldb.createTable('myTbl', mySchema, temporary=True)
        
        # Check that tbl exists.
        # NOTE: can't use query to mysql.informationschema,
        # b/c temp tables aren't listed there.
        
        try:
            # Will return some tuple; we don't
            # care what exaclty, as long as the
            # cmd doesn't fail:
            self.mysqldb.query('DESC myTbl').next()
        except Exception:
            self.fail('Temporary table not found after creation.')
        
        # Start new session, which should remove the table.
        # Query mysql information schema to check for table
        # present. Use raw cursor to test independently from
        # the pymysql_utils query() method:
        
        self.mysqldb.close()

        try:
            self.mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
        except ValueError as e:
            self.fail(str(e) + "Could not re-establish MySQL connection.")

        # NOTE: can't use query to mysql.informationschema,
        # b/c temp tables aren't listed there.
        
        try:
            self.mysqldb.query('DESC myTbl').next()
            self.fail("Temporary table did not disappear with session exit.")
        except ValueError:
            pass
Esempio n. 20
0
    def __init__(self, user, db, table, totalRows=None):
        '''
        Constructor
        '''
        home = os.environ['HOME']
        with open(os.path.join(home, '.ssh/mysql')) as pwdFd:
            pwd = pwdFd.read().strip()

        db = MySQLDB(db=db, user=user, passwd=pwd)
        # Number of rows pulled from EventIp:
        rowCount = 0

        # First row to get in the select statement:
        nextBatchStartRow = -UniqueAnonIpExtractor.BATCH_SIZE

        with open('/tmp/anonIps.csv', 'w') as fd:
            #*****with sys.stdout as fd:
            fd.write('anon_screen_name,ip\n')
            numRecords = db.query('SELECT count(*) from EventIp').next()
            if numRecords == 0:
                sys.exit()
            if totalRows is None:
                totalRows = numRecords
            while rowCount < numRecords and rowCount < totalRows:
                nextBatchStartRow += UniqueAnonIpExtractor.BATCH_SIZE
                for (anon_screen_name, ip)  in db.query('SELECT anon_screen_name, event_ip from EventIp LIMIT %s,%s' % \
                                                        (nextBatchStartRow, UniqueAnonIpExtractor.BATCH_SIZE)):
                    if UniqueAnonIpExtractor.seenAnons.get(
                            anon_screen_name, None) is None:
                        # The anon_screen_name in the db could actually be NULL, a.k.a. None.
                        # Ignore those:
                        if anon_screen_name is not None:
                            fd.write(anon_screen_name + ',' + ip + '\n')
                            UniqueAnonIpExtractor.seenAnons[
                                anon_screen_name] = 1
                    rowCount += 1
                    if (rowCount % UniqueAnonIpExtractor.BATCH_SIZE) == 0:
                        print("Did %s rows." % rowCount)
                    if rowCount >= totalRows:
                        break
            print('Finished %s rows; %s unique anon_screen_names' %
                  (rowCount, len(UniqueAnonIpExtractor.seenAnons.keys())))
Esempio n. 21
0
    def __init__(self,
                 bsonFileName,
                 mysqlDbObj=None,
                 forumTableName='contents',
                 allUsersTableName='EdxPrivate.UserGrade',
                 anonymize=True,
                 allowAnonScreenName=False):
        '''
        Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired),
        and place them into a MySQL table.

        :param bsonFileName: full path the .bson table. Set to None if instantiating
            for unit testing.
        :type bsonFileName: String
        :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are
            to be placed. If None, a new such object is created into MySQL db 'EdxForum'
        :type mysqlDbObj: MySQLDB
        :param forumTableName: name of table into which anonymized Forum entries are to be placed
        :type forumTableName: String
        :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names
            of users who post to the Forum. Used to redact their names from their own posts.
        :type allUsersTable: String
        :param anonymize: If true, Forum post entries in the MySQL table will be anonymized
        :type anonymize: bool
        :param allow_anon_screen_name: if True, then occurrences of poster's name in
            post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name
            is the hash used in other tables of the OpenEdX data.
        :type allow_anon_screen_name: Bool
        '''

        self.bsonFileName = bsonFileName
        self.forumTableName = forumTableName
        self.forumDbName = 'EdxForum'
        self.allUsersTableName = allUsersTableName
        self.anonymize = anonymize
        self.allowAnonScreenName = allowAnonScreenName

        # If not unittest, but regular run, then mysqlDbObj is None
        if mysqlDbObj is None:
            self.mysql_passwd = self.getMySQLPasswd()
            self.mysql_dbhost ='localhost'
            self.mysql_user = getpass.getuser() # mySQLUser that started this process
            self.mydb = MySQLDB(user=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName)
        else:
            self.mydb = mysqlDbObj

        self.counter=0

        self.userCache = {}
        self.userSet   = set()

        warnings.filterwarnings('ignore', category=MySQLdb.Warning)
        self.setupLogging()
        self.prepDatabase()
Esempio n. 22
0
 def setUp(self):
     
     self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum")
     # Fill the little MongoDB with test JSON lines
     self.resetMongoTestDb()
     
     self.mysqldb = MySQLDB(mySQLUser='******', db='unittest')
     # Start with an empty result MySQL table for each test:
     self.mysqldb.dropTable('contents')
     # Fill the fake UserGrade table with records of course participants:
     self.resetMySQLUserListDb()
     
     # Instantiate a Forum scrubber without the 
     # name of a bson file that contains forum
     # records. That 'None' for the bson file will
     # make the class understand that it's being
     # instantiated for a unit test. 
     self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade')
     self.forumScrubberRelatable  = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True)
     self.forumScrubberClear      = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)
 def __init__(self, user, pwd):
     self.ipCountryXlater = IpCountryDict()
     self.user = user
     self.pwd  = pwd
     self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
     self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
     self.db.createTable(UserCountryTableCreator.DEST_TABLE, 
                                        OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""',
                                         'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""',
                                         'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""',
                                         'country' : 'varchar(255) NOT NULL DEFAULT ""'}))
 def setUp(self):
     application = None
     request = None  # HTTPRequest.HTTPRequest()
     self.courseServer = CourseCSVServer(application, request, testing=True)
     try:
         self.mysqldb = MySQLDB(host="localhost", port=3306, user="******", db="unittest")
     except ValueError as e:
         self.fail(
             str(e)
             + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')"
         )
 def __init__(self, uid, pwd, db='Edx'):
     '''
     ****** Update this comment header
     Make connection to MySQL wrapper.
     @param uid: MySQL user under which to log in. Assumed to be other than None
     @type uid: String
     @param pwd: MySQL password for user uid. May be None.
     @type pwd: {String | None}
     '''
     self.db = db
     if pwd is None:
         self.mysqldbStudModule = MySQLDB(user=uid, db=db)
     else:
         self.mysqldbStudModule = MySQLDB(user=uid, passwd=pwd, db=db)
     # Create a string with the parameters of the SELECT call,
     # (activity_grade_id,student_id,...):
     self.colSpec = AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[0]
     for colName in AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[1:]:
         self.colSpec += ',' + colName
     
     self.pullRowByRow()
    def __init__(self, uid, pwd, db='Edx'):
        '''
        ****** Update this comment header
        Make connection to MySQL wrapper.
        @param uid: MySQL user under which to log in. Assumed to be other than None
        @type uid: String
        @param pwd: MySQL password for user uid. May be None.
        @type pwd: {String | None}
        '''
        self.db = db
        if pwd is None:
            self.mysqldbStudModule = MySQLDB(user=uid, db=db)
        else:
            self.mysqldbStudModule = MySQLDB(user=uid, passwd=pwd, db=db)
        # Create a string with the parameters of the SELECT call,
        # (activity_grade_id,student_id,...):
        self.colSpec = AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[0]
        for colName in AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[1:]:
            self.colSpec += ',' + colName

        self.pullRowByRow()
Esempio n. 27
0
    def setUpClass(cls):
        super(CanvasUtilsTests, cls).setUpClass()

        # Read config file to see which MySQL server test_host we should
        # run the tests on. If setup.py does not exist, copy
        # setupSample.py to setup.py:

        config_info = ConfigInfo()
        test_host = config_info.test_default_host
        user = config_info.test_default_user
        cls.canvas_pwd_file = config_info.canvas_pwd_file

        # Access to common functionality:
        cls.utils = Utilities()

        # If not working on localhost, where we expect a db
        # 'Unittest" Ensure there is a unittest db for us to work in.
        # We'll delete it later:

        if test_host == 'localhost':
            db_name = 'Unittest'
        else:
            db = None
            db = MySQLDB(host=test_host,
                         user=config_info.test_default_user,
                         passwd=cls.utils.get_db_pwd(test_host,
                                                     unittests=True))
            try:
                db_name = UnittestDbFinder(db).db_name
            except Exception as e:
                raise AssertionError(
                    f"Cannot open db to find a unittest db: {repr(e)}")
            finally:
                if db is not None:
                    db.close()

        CanvasUtilsTests.test_host = test_host
        CanvasUtilsTests.unittests_db_nm = db_name
        CanvasUtilsTests.user = user
 def testAddAnonToActivityTable(self):
     try:
         # Modify the fake courseware_studentmodule excerpt
         # to add anon_screen_name, computer plusses/minusses,
         # compute grade percentage, etc:
         AnonAndModIDAdder('unittest', '', db='unittest', testing=True)
         self.db = MySQLDB(user='******', passwd='', db='unittest')
         for rowNum, row in enumerate(
                 self.db.query('SELECT %s FROM ActivityGrade;' %
                               self.allColNames)):
             #print(row)
             if rowNum == 0:
                 self.assertEqual((
                     0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1,
                     datetime.datetime(2014, 1, 10, 4, 10, 45),
                     datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1',
                     'abc', 'Guided Walkthrough',
                     'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'
                 ), row)
             elif rowNum == 1:
                 self.assertEqual((
                     1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1,
                     datetime.datetime(2014, 1, 10, 11, 30, 23),
                     datetime.datetime(2014, 2, 10, 14, 30,
                                       12), 'modtype2', 'def', 'Evaluation',
                     'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'
                 ), row)
             elif rowNum == 2:
                 self.assertEqual((
                     2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1,
                     datetime.datetime(2014, 1, 10, 18, 34, 12),
                     datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2',
                     'None', 'Introduction',
                     'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'
                 ), row)
     finally:
         self.db.close()
Esempio n. 29
0
 def __init__(self, user, pwd):
     self.ipCountryXlater = IpCountryDict()
     self.user = user
     self.pwd = pwd
     self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
     # Make sure table exists. It should, and it should be filled
     # with all anon_screen_name and countries up the previous
     # load:
     createCmd = '''CREATE TABLE %s (
                      anon_screen_name varchar(40) NOT NULL DEFAULT "",
                      two_letter_country varchar(2) NOT NULL DEFAULT "",
                      three_letter_country varchar(3) NOT NULL DEFAULT "",
                      country varchar(255) NOT NULL DEFAULT "",
                      region varchar(255) NOT NULL DEFAULT "",
                      city varchar(255) NOT NULL DEFAULT "",
                      lat_long point NOT NULL
                      ) ENGINE=MyISAM;
                      ''' % UserDetailedLocationTableCreator.DEST_TABLE
     self.db.dropTable('UserCountry')
     print("Creating table %..." %
           UserDetailedLocationTableCreator.DEST_TABLE)
     self.db.execute(createCmd)
     print("Done creating table %s." %
           UserDetailedLocationTableCreator.DEST_TABLE)
    def setUpClass(cls):
        super(FindUnittestDbTester, cls).setUpClass()

        # Get whether to test on localhost, or on
        # remote host:

        config_info = ConfigInfo()
        cls.test_host = config_info.test_default_host
        cls.user = config_info.test_default_user
        utils = Utilities()

        cls.db = MySQLDB(user=cls.user,
                         passwd=utils.get_db_pwd(cls.test_host,
                                                 unittests=True),
                         db='information_schema',
                         host=cls.test_host)
 def testAddAnonToActivityTable(self):
     try:
         # Modify the fake courseware_studentmodule excerpt
         # to add anon_screen_name, computer plusses/minusses, 
         # compute grade percentage, etc:
         AnonAndModIDAdder('unittest', '', db='unittest')
         self.db = MySQLDB(user='******', passwd='', db='unittest')
         for rowNum, row in enumerate(self.db.query('SELECT %s FROM ActivityGrade;' % self.allColNames)):
             #print(row)
             if rowNum == 0:
                 self.assertEqual((0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1, datetime.datetime(2014, 1, 10, 4, 10, 45), datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1', '', 'Guided Walkthrough', 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'), 
                                  row)
             elif rowNum == 1:
                 self.assertEqual((1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1, datetime.datetime(2014, 1, 10, 11, 30, 23), datetime.datetime(2014, 2, 10, 14, 30, 12), 'modtype2', '', 'Evaluation', 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'),
                                  row)
             elif rowNum == 2:
                 self.assertEqual((2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1, datetime.datetime(2014, 1, 10, 18, 34, 12), datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2', '', 'Introduction', 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'),
                                  row)         
     finally:
         self.db.close()
Esempio n. 32
0
 def setUp(self):
     
     self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum")
     # Fill the little MongoDB with test JSON lines
     self.resetMongoTestDb()
     
     self.mysqldb = MySQLDB(user='******', db='unittest')
     # Start with an empty result MySQL table for each test:
     self.mysqldb.dropTable('contents')
     # Fill the fake UserGrade table with records of course participants:
     self.resetMySQLUserListDb()
     
     # Instantiate a Forum scrubber without the 
     # name of a bson file that contains forum
     # records. That 'None' for the bson file will
     # make the class understand that it's being
     # instantiated for a unit test. 
     self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade')
     self.forumScrubberRelatable  = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True)
     self.forumScrubberClear      = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)
    def __init__(self, topic=None, user='******', passwd=''):
        '''
        Instantiated for each incoming bus message
        '''
        if topic is None:
            topic = CoursesGivenQuarter.module_topic
            
        self.mysqldb = MySQLDB(host='127.0.0.1', 
                               port=CoursesGivenQuarter.MYSQL_PORT_LOCAL, 
                               user=user, 
                               passwd=passwd, 
                               db='Edx')
        
        # The following statement is needed only 
        # if your callback is a method (rather than a top 
        # level function). That's because Python methods
        # take 'self' as a first argument, while the Bus 
        # expects a function that just takes topicName, msgText, and msgOffset.
        # The following statement creates a function wrapper around 
        # our callback method that has the leading 'self' parameter built 
        # in. The process is called function currying:
        
        self.requestDeliveryMethod = functools.partial(self.requestCoursesForQuarter)        
        
        # Create a BusAdapter instance:
        
        self.bus = BusAdapter()

        # Tell the bus that you are interested in the topic 'example_use',
        # and want callbacks to self.exampleDeliveryMethod whenever
        # a message arrives:
        
        self.bus.subscribeToTopic(topic, self.requestDeliveryMethod)
        
        # Now we do nothing. In a production system you 
        # would do something useful here:
        
        while True:
            # do anything you like
            self.bus.waitForMessage(CoursesGivenQuarter.module_topic)
Esempio n. 34
0
    def get_mysql_version(cls):
        '''
        Return a tuple: (major, minor). 
        Example, for MySQL 5.7.15, return (5,7).
        Return (None,None) if version number not found.

        '''
        
        # Where is mysql client program?
        mysql_path = MySQLDB.find_mysql_path()
      
        # Get version string, which looks like this:
        #   'Distrib 5.7.15, for osx10.11 (x86_64) using  EditLine wrapper\n'
        version_str = subprocess.check_output([mysql_path, '--version']).decode('utf-8')
        
        # Isolate the major and minor version numbers (e.g. '5', and '7')
        pat = re.compile(r'([0-9]*)[.]([0-9]*)[.]')
        match_obj = pat.search(version_str)
        if match_obj is None:
            return (None,None)
        (major, minor) = match_obj.groups()
        return (int(major), int(minor))
Esempio n. 35
0
 def tearDownClass(cls):
     super(AuxTableCopyTester, cls).tearDownClass()
     if cls.test_host == 'localhost':
         return
     
     db = None
     try:
         # Remove the unittest db we created:
         print(f"Removing database '{cls.db_name}'...")
         db = MySQLDB(user=cls.user, 
                      passwd=cls.mysql_pwd, 
                      db='information_schema', 
                      host=cls.test_host)
         db.execute(f"DROP DATABASE {cls.db_name}")
         print(print(f"Done removing database '{cls.db_name}'..."))
         #AuxTableCopyTester.copier_obj.close()
         pass
     finally:
         if db is not None:
             db.close()
Esempio n. 36
0
 def log_into_mysql(cls, user, db_pwd, db=None):
     
     host = AuxTableCopyTester.test_host
     try:
         # Try logging in, specifying the database in which all the tables
         # will be created: 
         db = MySQLDB(user=user, passwd=db_pwd, db=db, host=host)
     except ValueError as e:
         # Does unittest not exist yet?
         if str(e).find("OperationalError(1049,") > -1:
             # Log in without specifying a db to 'use':
             db =  MySQLDB(user=user, passwd=db_pwd, host=host)
             # Create the db:
             db.execute('CREATE DATABASE %s;' % 'unittest')
         else:
             raise RuntimeError("Cannot open Canvas database: %s" % repr(e))
     except Exception as e:
         raise RuntimeError("Cannot open Canvas database: %s" % repr(e))
     
     return db
class TestAddAnonToActivityGrade(unittest.TestCase):

    studentmoduleExcerptSchema = OrderedDict({
                'activity_grade_id' : 'INT',
                'student_id' : 'INT',
                'course_display_name' : 'VARCHAR(255)',
                'grade' : 'VARCHAR(5)',
                'max_grade' : 'DOUBLE',
                'percent_grade' : 'DOUBLE',
                'parts_correctness' : 'VARCHAR(255)',
                'answers' : 'VARCHAR(255)',
                'num_attempts' : 'INT',
                'first_submit' : 'DATETIME',
                'last_submit' : 'DATETIME',
                'module_type' : 'VARCHAR(255)',
                'anon_screen_name' : 'VARCHAR(40)',
                'resource_display_name' : 'VARCHAR(255)',
                'module_id' : 'VARCHAR(255)'
                })
    
    studentmoduleExcerptColNames = [
                'activity_grade_id',
                'student_id',
                'course_display_name',
                'grade',
                'max_grade',
                'percent_grade',
                'parts_correctness',
                'answers',
                'num_attempts',
                'first_submit',
                'last_submit',
                'module_type',
                'anon_screen_name',
                'resource_display_name',
                'module_id'
                ]
    state1 = ' {"correct_map": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {"hint": "", "hintmode": null, "correctness": "correct", "npoints": null, "msg": "", "queuestate": null}}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}, "attempts": 1, "seed": 1, "done": true, "student_answers": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": "choice_1"}} '
    state2 = '{"correct_map": {}, "seed": 1, "student_answers": {}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}}'
    state3 = '{"position": 1}'
    
    modid1 = 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'
    modid2 = 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'
    modid3 = 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'
    studentmoduleExcerptValues = \
                [
                [0,1,'myCourse',3,10,-1.0,state1,'',-1,'2014-01-10 04:10:45','2014-02-10 10:14:40','modtype1','','',modid1],
                [1,2,'myCourse',5,10,-1.0,state2,'',-1,'2014-01-10 11:30:23','2014-02-10 14:30:12','modtype2','','',modid2],                
                [2,3,'yourCourse',8,10,-1.0,state3,'',-1,'2014-01-10 18:34:12','2014-02-10 19:10:33','modtype2','','',modid3]                                
               ]

    def setUp(self):
        self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[0]
        for colName in  TestAddAnonToActivityGrade.studentmoduleExcerptColNames[1:]:
            self.allColNames += ',' + colName
        
        self.db = MySQLDB(user='******', passwd='', db='unittest')
        self.db.dropTable('StudentmoduleExcerpt')
        self.db.createTable('StudentmoduleExcerpt', 
                            TestAddAnonToActivityGrade.studentmoduleExcerptSchema,
                            temporary=False)
                            #***temporary=True)
        self.db.bulkInsert('StudentmoduleExcerpt',
                           TestAddAnonToActivityGrade.studentmoduleExcerptColNames,
                           TestAddAnonToActivityGrade.studentmoduleExcerptValues)
        self.db.createTable('ActivityGrade', TestAddAnonToActivityGrade.studentmoduleExcerptSchema)
        # Make sure there isn't left over content (if the table existed):
        self.db.truncateTable('ActivityGrade')
        self.db.close()
    def tearDown(self):
        self.db = MySQLDB(user='******', passwd='', db='unittest')
        # Can't drop tables: hangs
        #self.db.dropTable('StudentmoduleExcerpt')
        #self.db.dropTable('ActivityGrade')
        self.db.close()
        pass
        
        
    def testAddAnonToActivityTable(self):
        try:
            # Modify the fake courseware_studentmodule excerpt
            # to add anon_screen_name, computer plusses/minusses, 
            # compute grade percentage, etc:
            AnonAndModIDAdder('unittest', '', db='unittest')
            self.db = MySQLDB(user='******', passwd='', db='unittest')
            for rowNum, row in enumerate(self.db.query('SELECT %s FROM ActivityGrade;' % self.allColNames)):
                #print(row)
                if rowNum == 0:
                    self.assertEqual((0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1, datetime.datetime(2014, 1, 10, 4, 10, 45), datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1', '', 'Guided Walkthrough', 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'), 
                                     row)
                elif rowNum == 1:
                    self.assertEqual((1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1, datetime.datetime(2014, 1, 10, 11, 30, 23), datetime.datetime(2014, 2, 10, 14, 30, 12), 'modtype2', '', 'Evaluation', 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'),
                                     row)
                elif rowNum == 2:
                    self.assertEqual((2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1, datetime.datetime(2014, 1, 10, 18, 34, 12), datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2', '', 'Introduction', 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'),
                                     row)         
        finally:
            self.db.close()
Esempio n. 38
0
    def setUpClass(cls):
        # Ensure that a user unittest with the proper
        # permissions exists in the db:
        TestPymysqlUtils.env_ok = True
        TestPymysqlUtils.err_msg = ''
        try:
            needed_grants = ['SELECT', 'INSERT', 'UPDATE', 
                             'DELETE', 'CREATE', 'CREATE TEMPORARY TABLES', 
                             'DROP', 'ALTER']
            mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
            grant_query = 'SHOW GRANTS FOR unittest@localhost'
            query_it = mysqldb.query(grant_query)
            # First row of the SHOW GRANTS response should be
            # one of:
            first_grants = ["GRANT USAGE ON *.* TO 'unittest'@'localhost'",
                            "GRANT USAGE ON *.* TO `unittest`@`localhost`"
                            ]
            # Second row depends on the order in which the 
            # grants were provided. The row will look something
            # like:
            #   GRANT SELECT, INSERT, UPDATE, DELETE, ..., CREATE, DROP, ALTER ON `unittest`.* TO 'unittest'@'localhost'
            # Verify:
            usage_grant = query_it.next()
            if usage_grant not in first_grants:
                TestPymysqlUtils.err_msg = '''
                    User 'unittest' is missing USAGE grant needed to run the tests.
                    Also need this in your MySQL: 
                    
                          %s
                    ''' % 'GRANT %s ON unittest.* TO unittest@localhost' % ','.join(needed_grants)
                TestPymysqlUtils.env_ok = False
                return
            grants_str = query_it.next()
            for needed_grant in needed_grants:
                if grants_str.find(needed_grant) == -1:
                    TestPymysqlUtils.err_msg = '''
                    User 'unittest' does not have the '%s' permission needed to run the tests.
                    Need this in your MySQL:
                    
                        %s
                    ''' % (needed_grant, 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants))
                    TestPymysqlUtils.env_ok = False
                    return  
        except (ValueError,RuntimeError):
            TestPymysqlUtils.err_msg = '''
               For unit testing, localhost MySQL server must have 
               user 'unittest' without password, and a database 
               called 'unittest'. To create these prerequisites 
               in MySQL:
               
                    CREATE USER unittest@localhost;
                    CREATE DATABASE unittest; 
               This user needs permissions:
                    %s 
               ''' % 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants)
            TestPymysqlUtils.env_ok = False

        # Check MySQL version:
        try:
            (major, minor) = TestPymysqlUtils.get_mysql_version()
        except Exception as e:
            raise OSError('Could not get mysql version number: %s' % str(e))
            
        if major is None:
            print('Warning: MySQL version number not found; testing as if V5.7')
            TestPymysqlUtils.major = 5
            TestPymysqlUtils.minor = 7
        else:
            TestPymysqlUtils.major = major
            TestPymysqlUtils.minor = minor
            known_versions = [(5,6), (5,7), (8,0)]
            if (major,minor) not in known_versions:
                print('Warning: MySQL version is %s.%s; but testing as if V5.7')
                TestPymysqlUtils.major = 5
                TestPymysqlUtils.minor = 7
Esempio n. 39
0
class AnonAndModIDAdder(object):

    # Number of rows to process in memory
    # before writing to ActivityGrade:
    BATCH_SIZE = 10000
    
    # For explanation of the following regex patterns,
    # see header comment of parseStateJSON:
    SOLUTION_RESULT_PATTERN  = re.compile(r'[^"]*correctness": "([^"]*)')
    SOLUTION_ANSWERS_PATTERN = re.compile(r'[^:]*: "([^"]*)"')
    
    ACTIVITY_GRADE_COL_NAMES = [
                'activity_grade_id',
                'student_id',
                'course_display_name',
                'grade',
                'max_grade',
                'percent_grade',
                'parts_correctness',
                'answers',
                'num_attempts',
                'first_submit',
                'last_submit',
                'module_type',
                'anon_screen_name',
                'resource_display_name',
                'module_id'
                ]
    
    # Indices into tuples from StudentmoduleExcerpt:
    STUDENT_INT_ID_INDEX = 1
    GRADE_INDEX = 3
    MAX_GRADE_INDEX = 4
    PERCENT_GRADE_INDEX = 5
    PARTS_CORRECTNESS_INDEX = 6
    ANSWERS_INDEX = 7
    NUM_ATTEMPTS_INDEX = 8
    ANON_SCREEN_NAME_INDEX = 12
    RESOURCE_DISPLAY_NAME_INDEX = 13
    MODULE_ID_INDEX = 14
    
    
    def __init__(self, uid, pwd, db='Edx', testing=False):
        '''
        ****** Update this comment header
        Make connection to MySQL wrapper.
        @param uid: MySQL user under which to log in. Assumed to be other than None
        @type uid: String
        @param pwd: MySQL password for user uid. May be None.
        @type pwd: {String | None}
        '''
        self.db = db
        if pwd is None:
            self.mysqldbStudModule = MySQLDB(user=uid, db=db)
        else:
            self.mysqldbStudModule = MySQLDB(user=uid, passwd=pwd, db=db)
        # Create a string with the parameters of the SELECT call,
        # (activity_grade_id,student_id,...):
        self.colSpec = AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[0]
        for colName in AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[1:]:
            self.colSpec += ',' + colName
    
        self.cacheIdInt2Anon(testing)
        self.pullRowByRow()

    def cacheIdInt2Anon(self, testing=False):
        '''
        Builds a dict to map platform integers to anon_screen_names. 
        
    :param testing: If set true, then all tables are assumed to be in MySQL DB unittest.
        :type testing: boolean
        '''
        self.int2AnonCache = {}
        if testing:
            queryIt = self.mysqldbStudModule.query("SELECT student_id AS user_int_id, \
                                                           unittest.UserGrade.anon_screen_name \
                                                      FROM unittest.StudentmoduleExcerpt LEFT JOIN unittest.UserGrade \
                                                        ON unittest.StudentmoduleExcerpt.student_id = unittest.UserGrade.user_int_id;")
        else:
            queryIt = self.mysqldbStudModule.query("SELECT student_id AS user_int_id, \
                                                           EdxPrivate.UserGrade.anon_screen_name \
                                                      FROM edxprod.StudentmoduleExcerpt LEFT JOIN EdxPrivate.UserGrade \
                                                        ON edxprod.StudentmoduleExcerpt.student_id = EdxPrivate.UserGrade.user_int_id;")
        for user_int_id, anon_screen_name in queryIt:
            self.int2AnonCache[user_int_id] = anon_screen_name;

    def pullRowByRow(self):
        rowBatch = []
        theQuery = "SELECT activity_grade_id,student_id,\
                    	   course_display_name,grade,max_grade,percent_grade,\
                    	   parts_correctness,answers,num_attempts,first_submit,\
                    	   last_submit,module_type,anon_screen_name,\
                    	   resource_display_name,module_id \
                    FROM edxprod.StudentmoduleExcerpt \
                    WHERE isTrueCourseName(course_display_name) = 1;"
        if self.db == 'unittest':
            queryIt = self.mysqldbStudModule.query("SELECT %s FROM unittest.StudentmoduleExcerpt;" % self.colSpec)
        else:
            #**********queryIt = self.mysqldbStudModule.query("SELECT %s FROM edxprod.StudentmoduleExcerpt;" % self.colSpec)
            queryIt = self.mysqldbStudModule.query(theQuery)
        for studmodTuple in queryIt:
            # Results return as tuples, but we need to change tuple items by index.
            # So must convert to list:
            studmodTuple = list(studmodTuple)
            # Resolve the module_id into a human readable resource_display_name:
            moduleID = studmodTuple[AnonAndModIDAdder.MODULE_ID_INDEX]
            studmodTuple[AnonAndModIDAdder.RESOURCE_DISPLAY_NAME_INDEX] = self.getResourceDisplayName(moduleID)
            
            # Compute the anon_screen_name:
            studentIntId = studmodTuple[AnonAndModIDAdder.STUDENT_INT_ID_INDEX]
            try:
                studmodTuple[AnonAndModIDAdder.ANON_SCREEN_NAME_INDEX] = self.int2AnonCache[studentIntId]
            except TypeError:
                studmodTuple[AnonAndModIDAdder.ANON_SCREEN_NAME_INDEX] = ''

            # Pick grade and max_grade out of the row,
            # compute the percentage, and place that 
            # back into the row in col 
            grade = studmodTuple[AnonAndModIDAdder.GRADE_INDEX]
            max_grade = studmodTuple[AnonAndModIDAdder.MAX_GRADE_INDEX]
            percent_grade = 'NULL'
            try:
                percent_grade = round((int(grade) * 100.0/ int(max_grade)), 2)
            except:
                pass
            studmodTuple[AnonAndModIDAdder.PERCENT_GRADE_INDEX] = str(percent_grade)

            # Parse 'state' column from JSON and put result into plusses/minusses column:
            (partsCorrectness, answers, numAttempts) = \
                self.parseStateJSON(studmodTuple[AnonAndModIDAdder.PARTS_CORRECTNESS_INDEX])
            
            studmodTuple[AnonAndModIDAdder.PARTS_CORRECTNESS_INDEX] = partsCorrectness
            studmodTuple[AnonAndModIDAdder.ANSWERS_INDEX] = ','.join(answers)
            studmodTuple[AnonAndModIDAdder.NUM_ATTEMPTS_INDEX] = numAttempts
            
            rowBatch.append(studmodTuple)
            if len(rowBatch) >= AnonAndModIDAdder.BATCH_SIZE:
                self.mysqldbStudModule.bulkInsert('ActivityGrade', AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES, rowBatch)
                rowBatch = []
        if len(rowBatch) > 0:
            self.mysqldbStudModule.bulkInsert('ActivityGrade', AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES, rowBatch)
    
    def getResourceDisplayName(self, moduleID):
        moduleName = Utils.getModuleNameFromID(moduleID)
        return moduleName


    def parseStateJSON(self, jsonStateStr, srcTableName='courseware_studentmodule'):
        '''
        Given the 'state' column from a courseware_studentmodule
        column, return a 3-tuple: (plusMinusStr, answersArray, numAttempts)
        The plusMinusStr will be a string of '+' and '-'. A
        plus means that the problem solution part of an assignment
        submission was correct; a '-' means it was incorrect. The
        plus/minus indicators are arranged in the order of the problem
        subparts; like '++-' for a three-part problem in which the student
        got the first two correct, the last one incorrect.
        
        The answersArray will be an array of answers to the corresponding
        problems, like ['choice_0', 'choice_1'].
        
        Input for a problem solution with two parts looks like this::
            {   		           
    		 "correct_map": {
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": {
    		     "hint": "",
    		     "hintmode": null,
    		     "correctness": "correct",
    		     "npoints": null,
    		     "msg": "",
    		     "queuestate": null
    		   },
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": {
    		     "hint": "",
    		     "hintmode": null,
    		     "correctness": "correct",
    		     "npoints": null,
    		     "msg": "",
    		     "queuestate": null
    		   }
    		 },
    		 "input_state": {
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": {
    		     
    		   },
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": {
    		     
    		   }
    		 },
    		 "attempts": 3,
    		 "seed": 1,
    		 "done": true,
    		 "student_answers": {
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": "choice_3",
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": "choice_0"
    		 }
        }   		        
        
        This structure is ugly enough even when imported into a dict
        via json.loads() that a regular expression solution is faster.
        Three regexp are used:
          - SOLUTION_RESULT_PATTERN  = re.compile(r'[^"]*correctness": "([^"]*)')
              looks for the correctness entries: 'correct', 'incorrect'.
              First the regex throws away front parts the JSON that do not consist
              of 'correctness": '. That's the '"[^"]*correctness": "' par
              of the regex
              Next, a capture group grabs all letters that are not a double
              quote. That's the '([^"]*)' part of the regex. Those capture
              groups will contain the words 'correct' or 'incorrect'.
               
          - SOLUTION_ANSWERS_PATTERN = re.compile(r'[^:]*: "([^"]*)"')
              looks for the answers themselves: 'choice_0', etc. This pattern
              assumes that we first cut off from the JSON all the front part up
              to 'student_answers":'. The regex operates over the rest:
              The '[^:]*: "' skips over all text up to the next colon, followed
              by a space and opening double quote. The capture group grabs the 
              answer, as in 'choice_0'. 
        
        @param jsonStateStr:
        @type jsonStateStr:
        @param srcTableName:
        @type srcTableName:
        @return: plus/minus string, array of participant's answers, number of attempts. 
               If number of attempts is -1 the row was not a problem statement,
               or number of attempts was otherwise unavailable.
        @rtype: (string, [string], int)
        '''
        successResults = ''
        # The following badAnswers array is filled with
        # just the wrong answers. It's maintained, but
        # not currently returned, b/c users didn't feel
        # they needed it.
        badAnswers = [] 
        answers = []
        numAttempts = -1
        
        # Many state entries are not student problem result 
        # submissions, but of the form "{'postion': 4}".
        # Weed those out:
        if jsonStateStr.find('correct_map') == -1:
            #return (successResults, badAnswers, numAttempts)
            return (successResults, answers, numAttempts)
        
        # Get the ['correct','incorrect',...] array;
        # we'll use it later on:
        allSolutionResults = AnonAndModIDAdder.SOLUTION_RESULT_PATTERN.findall(jsonStateStr)
        
        
        # Next, get all the answers themselves.
        # Chop off all the JSON up to 'student_answers":':
        chopTxtMarker = 'student_answers":'
        chopPos = jsonStateStr.find(chopTxtMarker)
        if chopPos == -1:
            # Couldn't find the student answers; fine;
            #return (successResults, badAnswers, numAttempts)
            return (successResults, answers, numAttempts)
        else:
            # Get left with str starting at '{' in
            # "student_answers": {
            #   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": "choice_3",
            #   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": "choice_0"
            restJSON = jsonStateStr[chopPos+len(chopTxtMarker):]
            # ... and put the regex to work:
            answers = AnonAndModIDAdder.SOLUTION_ANSWERS_PATTERN.findall(restJSON)
        
        # Find number of attempts:
        # Find '"attempts": 3,...':
        chopTxtMarker = '"attempts": '
        chopPos = jsonStateStr.find(chopTxtMarker)
        if chopPos > 0:
            upToNum = jsonStateStr[chopPos+len(chopTxtMarker):]
            try:
                # The 'str' part of 'str(upToNum)' is needed b/c
                # the JSON is unicode, and isdigit() barfs when given
                # unicode:
                numAttempts = int("".join(itertools.takewhile(str.isdigit, str(upToNum))))
            except ValueError:
                # Couldn't find the number of attempts.
                # Just punt.
                pass
            except TypeError:
                # Unicode garbage, clearly not a digit
                pass
                
        # Go through the ['correct','incorrect',...] array,
        # and take two actions: if correct, add a '+' to
        # the successResults str; if 'incorrect' then add
        # a '-' to successResults, and transfer the 'bad'
        # answer to the badAnswers array:
        
        for (i, correctness) in enumerate(allSolutionResults):
            if  correctness == 'correct':
                successResults += '+'
            else:
                successResults += '-'
                try:
                    badAnswers.append(answers[i])
                except IndexError:
                    badAnswers.append('<unknown>')

        #return (successResults, badAnswers, numAttempts)
        return (successResults, answers, numAttempts)
Esempio n. 40
0
class EdxForumScrubber(object):
    '''

    Given a .bson file of OpenEdX Forum posts, load the file
    into a MongoDB. Then pull a post at a time, anonymize, and
    insert a selection of fields into a MySQL db. The MongoDb
    entries look like this::

    {
    	"_id" : ObjectId("51b75a48f359c40a00000028"),
    	"_type" : "Comment",
    	"abuse_flaggers" : [ ],
    	"anonymous" : false,
    	"anonymous_to_peers" : false,
    	"at_position_list" : [ ],
    	"author_id" : "26344",
    	"author_username" : "Minelly48",
    	"body" : "I am Gwen.I am a nursing professor who took statistics many years ago and want to refresh my knowledge.",
    	"comment_thread_id" : ObjectId("51b754e5f359c40a0000001d"),
    	"course_id" : "Medicine/HRP258/Statistics_in_Medicine",
    	"created_at" : ISODate("2013-06-11T17:11:36.831Z"),
    	"endorsed" : false,
    	"historical_abuse_flaggers" : [ ],
    	"parent_ids" : [ ],
    	"updated_at" : ISODate("2013-06-11T17:11:36.831Z"),
    	"visible" : true,
    	"votes" : {
    		"count" : 2,
    		"down" : [ ],
    		"down_count" : 0,
    		"point" : 2,
    		"up" : [
    			"40325",
    			"20323"
    		],
    		"up_count" : 2
    	},
    	"sk" : "51b75a48f359c40a00000028"
    }

    Depending on parameter allowAnonScreenName in the __init__() method,
    forum entries in the relational database will be associated with the
    same hash that is used to anonymize other parts of the OpenEdX data.

    '''

    LOG_DIR = '/home/dataman/Data/EdX/NonTransformLogs'

    # Pattern for email id - strings of alphabets/numbers/dots/hyphens followed
    # by an @ or at followed by combinations of dot/. followed by the edu/com
    # also, allow for spaces

    emailPattern='(.*)\s+([a-zA-Z0-9\(\.\-]+)[@]([a-zA-Z0-9\.]+)(.)(edu|com)\\s*(.*)'
    #emailPattern='(.*)\\s+([a-zA-Z0-9\\.]+)\\s*(\\(f.*b.*)?(@)\\s*([a-zA-Z0-9\\.\\s;]+)\\s*(\\.)\\s*(edu|com)\\s+(.*)'
    compiledEmailPattern = re.compile(emailPattern);

    # Pattern for replacing embedded double quotes in post bodies,
    # unless they are already escaped w/ a backslash. The
    # {0,1} means a match if zero or one repetition. It's
    # needed so that double quotes at the very start of a
    # string are matched: no preceding character at all:
    #doublQuoteReplPattern = re.compile(r'[^\\]{0,1}"')
    doublQuoteReplPattern = re.compile(r'[\\]{0,}"')

    # Schema of EdxForum.contents: an ordered dict that is
    # used twice: the table creation MySQL command is constructed
    # from this dict, and the dict is used to ensure that
    # all its keys (i.e. future column names) are present
    # in each MongoDB object. See also createForumTable().
    # In createForumTable() either entry anon_screen_name,
    # or screen_name in the dict below will be deleted, based
    # on whether we are asked to anonymize or not:

    forumSchema = OrderedDict({})

    forumSchema['forum_post_id'] =  "varchar(40) NOT NULL DEFAULT 'unavailable'"
    forumSchema['anon_screen_name'] =  "varchar(40) NOT NULL DEFAULT 'anon_screen_name_redacted'"  # This or next deleted based on anonymize yes/no
    forumSchema['screen_name'] =  "varchar(40) NOT NULL DEFAULT 'anon_screen_name_redacted'"       # This or prev deleted based on anonymize yes/no
    forumSchema['type'] =  "varchar(20) NOT NULL"
    forumSchema['anonymous'] =  "varchar(10) NOT NULL"
    forumSchema['anonymous_to_peers'] =  "varchar(10) NOT NULL"
    forumSchema['at_position_list'] =  "varchar(200) NOT NULL"
    forumSchema['forum_uid'] =  "varchar(40)  NOT NULL"
    forumSchema['body'] = "TEXT NOT NULL" #"varchar(2500) NOT NULL"
    forumSchema['course_display_name'] =  "varchar(100) NOT NULL"
    forumSchema['created_at'] =  "datetime NOT NULL"
    forumSchema['votes'] = "TEXT NOT NULL" # "varchar(200) NOT NULL"
    forumSchema['count'] =  "int(11) NOT NULL"
    forumSchema['down_count'] =  "int(11) NOT NULL"
    forumSchema['up_count'] =  "int(11) NOT NULL"
    forumSchema['up'] =  "varchar(200) DEFAULT NULL"
    forumSchema['down'] =  "varchar(200) DEFAULT NULL"
    forumSchema['comment_thread_id'] =  "varchar(255) DEFAULT NULL"
    forumSchema['parent_id'] =  "varchar(255) DEFAULT NULL"
    forumSchema['parent_ids'] =  "varchar(255) DEFAULT NULL"
    forumSchema['sk'] =  "varchar(255) DEFAULT NULL"
    forumSchema['confusion'] =  "varchar(20) NOT NULL DEFAULT ''"
    forumSchema['happiness'] =  "varchar(20) NOT NULL DEFAULT ''"


    def __init__(self,
                 bsonFileName,
                 mysqlDbObj=None,
                 forumTableName='contents',
                 allUsersTableName='EdxPrivate.UserGrade',
                 anonymize=True,
                 allowAnonScreenName=False):
        '''
        Given a .bson file containing OpenEdX Forum entries, anonymize the entries (if desired),
        and place them into a MySQL table.

        :param bsonFileName: full path the .bson table. Set to None if instantiating
            for unit testing.
        :type bsonFileName: String
        :param mysqlDbObj: a pymysql_utils.MySQLDB object where anonymized entries are
            to be placed. If None, a new such object is created into MySQL db 'EdxForum'
        :type mysqlDbObj: MySQLDB
        :param forumTableName: name of table into which anonymized Forum entries are to be placed
        :type forumTableName: String
        :param allUsersTable: fully qualified name of table listing all in-the-clear mySQLUser names
            of users who post to the Forum. Used to redact their names from their own posts.
        :type allUsersTable: String
        :param anonymize: If true, Forum post entries in the MySQL table will be anonymized
        :type anonymize: bool
        :param allow_anon_screen_name: if True, then occurrences of poster's name in
            post bodies are replaced by <redacName_<anon_screen_name>>, where anon_screen_name
            is the hash used in other tables of the OpenEdX data.
        :type allow_anon_screen_name: Bool
        '''

        self.bsonFileName = bsonFileName
        self.forumTableName = forumTableName
        self.forumDbName = 'EdxForum'
        self.allUsersTableName = allUsersTableName
        self.anonymize = anonymize
        self.allowAnonScreenName = allowAnonScreenName

        # If not unittest, but regular run, then mysqlDbObj is None
        if mysqlDbObj is None:
            self.mysql_passwd = self.getMySQLPasswd()
            self.mysql_dbhost ='localhost'
            self.mysql_user = getpass.getuser() # mySQLUser that started this process
            self.mydb = MySQLDB(user=self.mysql_user, passwd=self.mysql_passwd, db=self.forumDbName)
        else:
            self.mydb = mysqlDbObj

        self.counter=0

        self.userCache = {}
        self.userSet   = set()

        warnings.filterwarnings('ignore', category=MySQLdb.Warning)
        self.setupLogging()
        self.prepDatabase()

        #******mysqldb.commit();
        #******logging.info('commit completed!')

    def runConversion(self):
        '''
        Do the actual work. We don't call this method from __init__()
        so that unittests can create an EdxForumScrubber instance without
        doing the actual work. Instead, unittests call individual methods.
        '''
        self.populateUserCache();

        self.mongo_database_name = 'TmpForum'
        self.collection_name = 'contents'

        # Load bson file into Mongodb:
        self.loadForumIntoMongoDb(self.bsonFileName)
        self.mongodb = MongoDB(dbName=self.mongo_database_name, collection=self.collection_name)

        # Anonymize each forum record, and transfer to MySQL db:
        self.forumMongoToRelational(self.mongodb, self.mydb,'contents' )

        self.mydb.close()
        self.mongodb.close()
        self.logInfo('Entered %d records into %s' % (self.counter, self.forumDbName + '.' + self.forumTableName))

    def loadForumIntoMongoDb(self, bsonFilename):

        mongoclient = MongoClient();
        db = mongoclient[self.mongo_database_name];

        # Get collection object:
        collection = db[self.collection_name];

        # Clear out any old forum entries:
        self.logInfo('Preparing to delete the collection ')
        collection.remove()
        self.logInfo('Deleting mongo collection completed. Will now attempt a mongo restore')

        self.logInfo('Spawning subprocess to execute mongo restore')
        with open(self.logFilePath,'w') as outfile:
            ret = subprocess.call(
                   ['mongorestore',
                    '--drop',
                    '--db', self.mongo_database_name,
                    '--collection', self.collection_name,
                    bsonFilename],
                stdout=outfile, stderr=outfile)

            self.logDebug('Return value from mongorestore is %s' % (ret))

            objCount = subprocess.check_output(
                       ['mongo',
                        '--quiet',
                        '--eval',
                        'printjson(db.contents.count())',
                        self.mongo_database_name,
                        ],
                        stderr=outfile)
            self.numMongoItems = objCount

            self.logInfo('Available Forum posts %s' % objCount)

    def forumMongoToRelational(self, mongodb, mysqlDbObj, mysqlTable):
        '''
        Given a pymongo collection object in which Forum posts are stored,
        and a MySQL db object and table name, anonymize each mongo record,
        and insert it into the MySQL table.

        :param collection: collection object obtained via a mangoclient object
        :type collection: Collection
        :param mysqlDbObj: wrapper to MySQL db. See pymysql_utils.py
        :type mysqlDbObj: MYSQLDB
        :param mysqlTable: name of table where posts are to be deposited.
            Example: 'contents'.
        :type mysqlTable: String
        '''

        #command = 'mongorestore %s -db %s -mongoForumRec %s'%(self.bson_filename,self.mongo_database_name,self.collection_name)
        #print command

        self.logInfo('Will start inserting from mongo collection to MySQL')

        for mongoForumRec in mongodb.query({}):
            mongoRecordObj = MongoRecord(mongoForumRec)

            try:
                # Check whether 'up' can be converted to a list
                list(mongoRecordObj['up'])
            except Exception as e:
                self.logInfo("Error in conversion of 'up' field to a list (setting cell to -1):" + `e`)
                mongoRecordObj['up'] ='-1'

            # Make sure the MongoDB object has all fields that will
            # be needed for the forum schema:
            self.ensureSchemaAdherence(mongoRecordObj)

            self.insert_content_record(mysqlDbObj, mysqlTable, mongoRecordObj);

    def prepDatabase(self):
        '''
        Declare variables and execute statements preparing the database to
        configure options - e.g.: setting char set to utf, connection type to utf
        truncating the already existing table.
        '''
        try:
            self.logDebug("Setting and assigning char set for mysqld. will truncate old values")
            self.mydb.execute('SET NAMES utf8;');
            self.mydb.execute('SET CHARACTER SET utf8;');
            self.mydb.execute('SET character_set_connection=utf8;');

            # Compose fully qualified table name from the db name to
            # which self.mydb is connected, and the forum table name
            # that was established in __init__():
            fullTblName = self.mydb.dbName() + '.' + self.forumTableName
            # Clear old forum data out of the table:
            try:
                self.mydb.dropTable(fullTblName)
                # Create MySQL table for the posts. If we are to
                # anonymize, the poster name column will be 'screen_name',
                # else it will be 'anon_screen_name':
                self.createForumTable(self.anonymize)
                self.logDebug("setting and assigning char set complete. Truncation succeeded")
            except ValueError as e:
                self.logDebug("Failed either to set character codes, or to create forum table %s: %s" % (fullTblName, `e`))

        except MySQLdb.Error,e:
            self.logInfo("MySql Error exiting %d: %s" % (e.args[0],e.args[1]))
            # print e
            sys.exit(1)
Esempio n. 41
0
    def testBadParameters(self):
        self.mysqldb.close()

        # Test setting parameters illegally to None: 
        try:        
            with self.assertRaises(Exception) as context:
                MySQLDB(host=None, port=3306, user='******', db='unittest')
            self.assertTrue("None value(s) for ['host']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
    
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=None, user='******', db='unittest')
            self.assertTrue("None value(s) for ['port']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
    
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=3306, user=None, db='unittest')
            self.assertTrue("None value(s) for ['user']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
            
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=3306, user='******', db=None)
            self.assertTrue("None value(s) for ['db']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
            
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=3306, user='******', passwd=None, db='unittest')
            self.assertTrue("None value(s) for ['passwd']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
            
            with self.assertRaises(Exception) as context:
                MySQLDB(host=None, port=3306, user=None, db=None)
            self.assertTrue("None value(s) for ['host', 'db', 'user']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
        except AssertionError:
            # Create a better message than 'False is not True'.
            # That useless msg is generated if an expected exception
            # above is NOT raised:
            raise AssertionError('Expected ValueError exception "%s" was not raised.' % context.exception.message)
            
        # Check data types of parameters:
        try:
            # One illegal type: host==10:
            with self.assertRaises(Exception) as context:
                # Integer instead of string for host:
                MySQLDB(host=10, port=3306, user='******', db='myDb')
            self.assertTrue("Value(s) ['host'] have bad type;host,user,passwd, and db must be strings; port must be int."
                            in str(context.exception))
            # Two illegal types: host and user:
            with self.assertRaises(Exception) as context:
                # Integer instead of string for host:
                MySQLDB(host=10, port=3306, user=30, db='myDb')
            self.assertTrue("Value(s) ['host', 'user'] have bad type;host,user,passwd, and db must be strings; port must be int."
                            in str(context.exception))
            
            # Port being string instead of required int:
            with self.assertRaises(Exception) as context:
                # Integer instead of string for host:
                MySQLDB(host='myHost', port='3306', user='******', db='myDb')
            self.assertTrue("Port must be an integer; was" in str(context.exception))
            
        except AssertionError:
            # Create a better message than 'False is not True'.
            # That useless msg is generated if an expected exception
            # above is NOT raised:
            raise AssertionError('Expected ValueError exception "%s" was not raised.' % context.exception.message)
class ExtToAnonTableMaker(object):
    
    def __init__(self, extIdsFileName):
        
        user = '******'
        # Try to find pwd in specified user's $HOME/.ssh/mysql
        currUserHomeDir = os.getenv('HOME')
        if currUserHomeDir is None:
            pwd = None
        else:
            try:
                # Need to access MySQL db as its 'root':
                with open(os.path.join(currUserHomeDir, '.ssh/mysql_root')) as fd:
                    pwd = fd.readline().strip()
                # Switch user to 'root' b/c from now on it will need to be root:
                user = '******'
                
            except IOError:
                # No .ssh subdir of user's home, or no mysql inside .ssh:
                pwd = None
        
        self.db = MySQLDB(user=user, passwd=pwd, db='Misc')
        
        self.makeTmpExtsTable()
        self.loadExtIds(extIdsFileName)
        outfile = tempfile.NamedTemporaryFile(prefix='extsIntsScreenNames', suffix='.csv', delete=True)
        # Need to close this file, and thereby delete it,
        # so that MySQL is willing to write to it. Yes,
        # that's a race condition. But this is an
        # admin script, run by one person:
        outfile.close()
        self.findScreenNames(outfile.name)
        self.computeAnonFromScreenNames(outfile.name)

    def makeTmpExtsTable(self):
        # Create table to load the CSV file into:
        self.externalsTblNm = self.idGenerator(prefix='ExternalsTbl_')
        mysqlCmd = 'CREATE TEMPORARY TABLE %s (ext_id varchar(32));' % self.externalsTblNm
        self.db.execute(mysqlCmd)
        
    def loadExtIds(self, csvExtsFileName):
        # Clean up line endings in the extIds file.
        # Between Win, MySQL, Mac, and R, we get
        # linefeeds and crs:
        cleanExtsFile = tempfile.NamedTemporaryFile(prefix='cleanExts', suffix='.csv', delete=False)
        os.chmod(cleanExtsFile.name, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
        rawExtsFd = open(csvExtsFileName, 'r')
        for line in rawExtsFd:
            cleanExtsFile.write(line.strip() + '\n')
        cleanExtsFile.close()
        rawExtsFd.close()
        
        mysqlCmd = "LOAD DATA INFILE '%s' " % cleanExtsFile.name +\
                   'INTO TABLE %s ' % self.externalsTblNm +\
                   "FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES;"
        self.db.execute(mysqlCmd)
        
        # Delete the cleaned-exts file:
        os.remove(cleanExtsFile.name)
        
    def findScreenNames(self, outCSVFileName):
        
        mysqlCmd = "SELECT 'ext_id','user_int_id','screen_name'" +\
		    	   "UNION " +\
		    	   "SELECT ext_id," +\
		    	   "       user_int_id," +\
		    	   "       username " +\
		    	   "  INTO OUTFILE '%s'" % outCSVFileName +\
		    	   "  FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n'" +\
		    	   "  FROM "  +\
		    	   "    (SELECT ext_id,"  +\
		    	   "       user_id AS user_int_id "  +\
		    	   "       FROM %s LEFT JOIN edxprod.student_anonymoususerid " % self.externalsTblNm +\
		    	   "           ON %s.ext_id = edxprod.student_anonymoususerid.anonymous_user_id " % self.externalsTblNm +\
		    	   "    ) AS ExtAndInts " +\
		    	   "    LEFT JOIN edxprod.auth_user "  +\
		    	   "      ON edxprod.auth_user.id = ExtAndInts.user_int_id;"
        self.db.execute(mysqlCmd)
              
        
    def computeAnonFromScreenNames(self, extIntNameFileName):
        with open(extIntNameFileName, 'r') as inFd:
            print('ext_id,anon_screen_name')
            firstLineDiscarded = False
            for line in inFd:
                (extId, intId, screenName) = line.split(',') #@UnusedVariable
                #********
                #print('ScreenName.strip(\'"\'): \'%s\'' % screenName.strip().strip('"'))
                #********
                if firstLineDiscarded:
                    screenName = screenName.strip().strip('"')
                    if screenName == '\\N':
                        print ('%s,%s' % (extId.strip('"'),'NULL'))
                    else:
                        print('%s,%s' % (extId.strip('"'),EdXTrackLogJSONParser.makeHash(screenName)))
                else:
                    firstLineDiscarded = True
        
    def idGenerator(self, prefix='', size=6, chars=string.ascii_uppercase + string.digits):
        randPart = ''.join(random.choice(chars) for _ in range(size))
        return prefix + randPart
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'
    # Number of anon ids-country-2-letter-3-letter
    # tuples to accumulate before inserting into
    # UserCountry:
    INSERT_BULK_SIZE = 15000

    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        # Make sure table exists. It should, and it should be filled
        # with all anon_screen_name and countries up the previous
        # load:
        createCmd = '''CREATE TABLE UserCountry (
                         anon_screen_name varchar(40) NOT NULL DEFAULT "",
                         two_letter_country varchar(2) NOT NULL DEFAULT "",
                         three_letter_country varchar(3) NOT NULL DEFAULT "",
                         country varchar(255) NOT NULL DEFAULT ""
                         ) ENGINE=MyISAM;
                         '''
        self.db.dropTable('UserCountry')
        print("Creating table UserCountry...")
        self.db.execute(createCmd)
        print("Done creating table UserCountry.")

    def fillTable(self):
        query = "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"
        query_res_it = self.db.query(query)
        done = False
        # Order of columns for insert:
        colNameTuple = ('anon_screen_name', 'two_letter_country',
                        'three_letter_country', 'country')

        while not done:
            values = []
            print("%s: Starting one set of %s lookups..." %\
                  (str(datetime.datetime.today()),
                   UserCountryTableCreator.INSERT_BULK_SIZE))
            for _ in range(UserCountryTableCreator.INSERT_BULK_SIZE):
                try:
                    (anon_screen_name, ip3LetterCountry) = query_res_it.next()
                except StopIteration:
                    done = True
                    break
                # Try translating:
                try:
                    (twoLetterCode, threeLetterCode,
                     country) = self.ipCountryXlater.getBy3LetterCode(
                         ip3LetterCountry)
                except (ValueError, TypeError, KeyError):
                    twoLetterCode = 'XX'
                    threeLetterCode = 'XXX'
                    country = 'Not in lookup tbl'
                    #sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`))
                values.append(
                    tuple([
                        '%s' % anon_screen_name,
                        '%s' % twoLetterCode,
                        '%s' % threeLetterCode,
                        '%s' % country
                    ]))

            # Insert this chunk into the UserCountry table
            print("%s: Inserting %s rows into UserCountry table..." %
                  (str(datetime.datetime.today()), len(values)))
            (errors,
             warnings) = self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE,
                                            colNameTuple, values)
            if errors is not None:
                print('Error(s) during UserCountry insert: %s' % errors)
                sys.exit(1)
            if warnings is not None:
                print('Warning(s) during UserCountry insert: %s' % warnings)

            print("%s: Done inserting %s rows into UserCountry table..." %
                  (str(datetime.datetime.today()), len(values)))
            # ... and loop to process the next INSERT_BULK_SIZE batch

    def makeIndex(self):
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);"
        )
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);"
        )

    def close(self):
        self.db.close()
class ExportClassTest(unittest.TestCase):

    # Test data for one student in one class. Student is active in 2 of the
    # class' weeks:
    #
    # Week 4:
    # Session1: 15    total each week: Week4: 20
    # Session2:  5      	         Week6: 72
    #
    # Week 6:
    # Session3: 15
    # Session4: 42
    # Session5: 15
    # ------------
    #           92
    #
    # Sessions in weeks:
    # week4: [20]        ==> median = 20
    # week6: [15,42,15]  ==> median = 15
    #
    # The engagement summary file for one class:
    # totalStudentSessions, totalEffortAllStudents, oneToTwentyMin, twentyoneToSixtyMin, greaterSixtyMin
    #         5	                     92			        2                 0                  0
    #
    # The all_data detail file resulting from the data:
    # Platform,Course,Student,Date,Time,SessionLength
    #    'OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15
    #    'OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5
    #    'OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15
    #    'OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42
    #    'OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15
    #
    # The weekly effort file from the data:
    # platform,course,student,week,effortMinutes
    #    'OpenEdX,CME/MedStats/2013-2015,abc,4,20
    #    'OpenEdX,CME/MedStats/2013-2015,abc,6,72

    oneStudentTestData = [
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 03:27:00", 0),  # week 4; start session
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-08-30 03:27:20", 1),  # 20sec (gets rounded to 0min)
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-08-30 03:37:00", 0),  # 9min:40sec (gets rounded to 10min)
        # 0min + 10min + 5min = 15min
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 04:10:00", 0),  # 5min
        (
            "CME/MedStats/2013-2015",
            "abc",
            "load_video",
            "2013-09-14 03:27:24",
            1,
        ),  # week 6; 15min (for the single video)
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 03:27:25", 0),
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-09-15 03:30:35", 0),  # 3min
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-15 03:59:00", 1),  # 28min
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:05:00", 0),  #  6min
        # 3min + 28min + 6min + 5min = 42
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:36:54", 1),  # 15
    ]

    courseRuntimesData = [
        ("CME/MedStats/2013-2015", "2013-07-30 03:27:00", "2013-10-30 03:27:00"),
        ("My/RealCourse/2013-2015", "2013-09-01 03:27:00", "2013-10-30 03:27:00"),
    ]

    userGradeData = [
        (10, "CME/MedStats/2013-2015", "abc"),
        (20, "My/RealCourse/Summer2014", "def"),
        (30, "CME/MedStats/2013-2015", "def"),
    ]

    demographicsData = [("abc", "f", 1988, "hs", "USA", "United States"), ("def", "m", 1990, "p", "FRG", "Germany")]

    true_courseenrollmentData = [
        (10, "CME/MedStats/2013-2015", "2013-08-30 03:27:00", "nomode"),
        (30, "CME/MedStats/2013-2015", "2014-08-30 03:27:00", "yesmode"),
    ]
    courseInfoData = [
        ("CME/MedStats/2013-2015", "medStats", 2014, "fall", 1, 0, "2014-08-01", "2014-09-01", "2014-11-31")
    ]

    userCountryData = [("US", "USA", "abc", "United States"), ("DE", "DEU", "def", "Germany")]

    twoStudentsOneClassTestData = [
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 03:27:00", 0),
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-08-30 03:27:20", 1),
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-08-30 03:37:00", 0),
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 04:10:00", 0),
        ("CME/MedStats/2013-2015", "def", "page_close", "2013-08-30 04:10:00", 1),  # Second student
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-14 03:27:24", 1),
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 03:27:25", 0),
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-09-15 03:30:35", 0),
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-15 03:59:00", 1),
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:05:00", 0),
        ("CME/MedStats/2013-2015", "def", "page_close", "2013-09-16 04:10:00", 1),  # Second student
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:36:54", 1),
    ]

    twoStudentsTwoClassesTestData = [
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 03:27:00", 0),
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-08-30 03:27:20", 1),
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-08-30 03:37:00", 0),
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-08-30 04:10:00", 0),
        ("My/RealCourse/2013-2015", "def", "page_close", "2013-09-01 04:10:00", 1),  # Second student
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-14 03:27:24", 1),
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 03:27:25", 0),
        ("CME/MedStats/2013-2015", "abc", "page_close", "2013-09-15 03:30:35", 0),
        ("CME/MedStats/2013-2015", "abc", "load_video", "2013-09-15 03:59:00", 1),
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:05:00", 0),
        ("My/RealCourse/2013-2015", "def", "page_close", "2013-09-16 04:10:00", 1),  # Second student
        ("CME/MedStats/2013-2015", "abc", "seq_goto", "2013-09-15 04:36:54", 1),
    ]

    def setUp(self):
        application = None
        request = None  # HTTPRequest.HTTPRequest()
        self.courseServer = CourseCSVServer(application, request, testing=True)
        try:
            self.mysqldb = MySQLDB(host="localhost", port=3306, user="******", db="unittest")
        except ValueError as e:
            self.fail(
                str(e)
                + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')"
            )

    def tearDown(self):
        try:
            self.mysqldb.dropTable("unittest.Activities")
            self.mysqldb.close()
        except:
            pass

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testOneStudentOneClass(self):
        self.buildSupportTables(TestSet.ONE_STUDENT_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "CME/MedStats/2013-2015", "engagementData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        with open(self.courseServer.latestResultSummaryFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseSummaryLine)
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,5,92,2,0,0\n", fd.readline())

        with open(self.courseServer.latestResultDetailFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15\n", fd.readline())

        with open(self.courseServer.latestResultWeeklyEffortFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseWeeklyLine)
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,5,20\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,7,72\n", fd.readline())

        os.remove(self.courseServer.latestResultSummaryFilename)
        os.remove(self.courseServer.latestResultDetailFilename)
        os.remove(self.courseServer.latestResultWeeklyEffortFilename)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testTwoStudentsOneClass(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "CME/MedStats/2013-2015", "engagementData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        with open(self.courseServer.latestResultSummaryFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseSummaryLine)
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,7,122,4,0,0\n", fd.readline())

        with open(self.courseServer.latestResultDetailFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,2013-08-30,04:10:00,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,2013-09-16,04:10:00,15\n", fd.readline())

        with open(self.courseServer.latestResultWeeklyEffortFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseWeeklyLine)
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,5,20\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,7,72\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,5,15\n", fd.readline())
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,def,7,15\n", fd.readline())

        os.remove(self.courseServer.latestResultSummaryFilename)
        os.remove(self.courseServer.latestResultDetailFilename)
        os.remove(self.courseServer.latestResultWeeklyEffortFilename)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testTwoStudentsTwoClasses(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_TWO_CLASSES)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "None", "engagementData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        with open(self.courseServer.latestResultSummaryFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseSummaryLine)
            # Read the rest of the summary lines, and
            # sort them just to ensure that we compare each
            # line to its ground truth:
            allSummaryLines = fd.readlines()
            allSummaryLines.sort()
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,5,92,2,0,0\n", allSummaryLines[0])
            self.assertEqual("OpenEdX,My/RealCourse/2013-2015,2,30,2,0,0\n", allSummaryLines[1])

        with open(self.courseServer.latestResultDetailFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            allDetailLines = fd.readlines()
            allDetailLines.sort()
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,03:27:00,15\n", allDetailLines[0])
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-08-30,04:10:00,5\n", allDetailLines[1])
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-14,03:27:24,15\n", allDetailLines[2])
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,03:27:25,42\n", allDetailLines[3])
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,2013-09-15,04:36:54,15\n", allDetailLines[4])
            self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,2013-09-01,04:10:00,15\n", allDetailLines[5])
            self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,2013-09-16,04:10:00,15\n", allDetailLines[6])

        with open(self.courseServer.latestResultWeeklyEffortFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseWeeklyLine)
            allWeeklyLines = fd.readlines()
            allWeeklyLines.sort()
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,5,20\n", allWeeklyLines[0])
            self.assertEqual("OpenEdX,CME/MedStats/2013-2015,abc,7,72\n", allWeeklyLines[1])
            self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,1,15\n", allWeeklyLines[2])
            self.assertEqual("OpenEdX,My/RealCourse/2013-2015,def,3,15\n", allWeeklyLines[3])

        os.remove(self.courseServer.latestResultSummaryFilename)
        os.remove(self.courseServer.latestResultDetailFilename)
        os.remove(self.courseServer.latestResultWeeklyEffortFilename)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testForumIsolated(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "MITx/6.002x/2012_Fall", "forumData" : "True", "wipeExisting" : "True", "relatable" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        zipObj = zipfile.ZipFile(self.courseServer.latestForumFilename, "r")
        forumFd = zipObj.open("MITx_6.002x_2012_Fall_Forum.csv", "r", "foobar")
        forumExportHeader = (
            "'forum_post_id','anon_screen_name','type','anonymous',"
            + "'anonymous_to_peers','at_position_list','forum_int_id','body',"
            + "'course_display_name','created_at','votes','count','down_count',"
            + "'up_count','up','down','comment_thread_id','parent_id','parent_ids',"
            + "'sk','confusion','happiness'\n"
        )
        forum1stLine = "\"519461545924670200000001\",\"<anon_screen_name_redacted>\",\"CommentThread\",\"False\",\"False\",\"[]\",11,\"First forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': -6, 'down_count': 8, 'up': ['2', '10'], 'down': ['1', '3', '4', '5', '6', '7', '8', '9'], 'up_count': 2}\",10,8,2,\"['2', '10']\",\"['1', '3', '4', '5', '6', '7', '8', '9']\",\"None\",\"None\",\"None\",\"None\",\"none\",\"none\""
        forum2ndLine = "\"519461545924670200000005\",\"<anon_screen_name_redacted>\",\"Comment\",\"False\",\"False\",\"[]\",7,\"Second forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': 4, 'down_count': 3, 'up': ['1', '2', '5', '6', '7', '8', '9'], 'down': ['3', '4', '10'], 'up_count': 7}\",10,3,7,\"['1', '2', '5', '6', '7', '8', '9']\",\"['3', '4', '10']\",\"519461545924670200000001\",\"None\",\"[]\",\"519461545924670200000005\",\"none\",\"none\""

        header = forumFd.readline()
        self.assertEqual(forumExportHeader, header)

        self.assertEqual(forum1stLine, forumFd.readline().strip())
        self.assertEqual(forum2ndLine, forumFd.readline().strip())

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testForumRelatable(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "MITx/6.002x/2012_Fall", "forumData" : "True", "wipeExisting" : "True", "relatable" : "True", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        zipObj = zipfile.ZipFile(self.courseServer.latestForumFilename, "r")
        forumFd = zipObj.open("MITx_6.002x_2012_Fall_Forum.csv", "r", "foobar")
        forumExportHeader = (
            "'forum_post_id','anon_screen_name','type','anonymous',"
            + "'anonymous_to_peers','at_position_list','forum_int_id','body',"
            + "'course_display_name','created_at','votes','count','down_count',"
            + "'up_count','up','down','comment_thread_id','parent_id','parent_ids',"
            + "'sk','confusion','happiness'\n"
        )
        forum1stLine = "\"519461545924670200000001\",\"e07a3da71f0330452a6aa650ed598e2911301491\",\"CommentThread\",\"False\",\"False\",\"[]\",0,\"First forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': -6, 'down_count': 8, 'up': ['2', '10'], 'down': ['1', '3', '4', '5', '6', '7', '8', '9'], 'up_count': 2}\",10,8,2,\"['2', '10']\",\"['1', '3', '4', '5', '6', '7', '8', '9']\",\"None\",\"None\",\"None\",\"None\",\"none\",\"none\""
        forum2ndLine = "\"519461545924670200000005\",\"e07a3da71f0330452a6aa650ed598e2911301491\",\"Comment\",\"False\",\"False\",\"[]\",0,\"Second forum entry.\",\"MITx/6.002x/2012_Fall\",\"2013-05-16 04:32:20\",\"{'count': 10, 'point': 4, 'down_count': 3, 'up': ['1', '2', '5', '6', '7', '8', '9'], 'down': ['3', '4', '10'], 'up_count': 7}\",10,3,7,\"['1', '2', '5', '6', '7', '8', '9']\",\"['3', '4', '10']\",\"519461545924670200000001\",\"None\",\"[]\",\"519461545924670200000005\",\"none\",\"none\""

        header = forumFd.readline()
        self.assertEqual(forumExportHeader, header)

        self.assertEqual(forum1stLine, forumFd.readline().strip())
        self.assertEqual(forum2ndLine, forumFd.readline().strip())

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testForumIsolatedCourseNotInForum(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "Course/Not/Exists", "forumData" : "True", "wipeExisting" : "True", "inclPII" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        os.path.exists(self.courseServer.latestForumFilename)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testDemographics(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "testtest/MedStats/2013-2015", "demographics" : "True", "wipeExisting" : "True", "relatable" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        # Allow result to be computed:
        time.sleep(3)
        with open(self.courseServer.latestDemographicsFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseSummaryLine)
            # Read the rest of the summary lines, and
            # sort them just to ensure that we compare each
            # line to its ground truth:
            allDemographicsLines = fd.readlines()
            allDemographicsLines.sort()
            # abc,f,1988,hs,USA,United States
            self.assertEqual('"abc","f","1988","hs","USA","United States"', allDemographicsLines[0].strip())
            self.assertEqual('"def","m","1990","p","FRG","Germany"', allDemographicsLines[1].strip())
        os.remove(self.courseServer.latestDemographicsFilename)

    # ******@unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testQuarterlyDemographics(self):
        self.buildSupportTables(TestSet.TWO_STUDENTS_ONE_CLASS)
        jsonMsg = '{"req" : "getData", "args" : {"courseId" : "testtest/MedStats/2013-2015", "quarterRep": "True", "quarterRepDemographics" : "True", "quarterRepQuarter" : "fall", "quarterRepYear": "2014", "wipeExisting" : "True", "relatable" : "False", "cryptoPwd" : "foobar"}}'
        self.courseServer.on_message(jsonMsg)
        # Allow result to be computed:
        time.sleep(3)
        with open(self.courseServer.latestQuarterlyDemographicsFilename, "r") as fd:
            # Read and discard the csv file's header line:
            fd.readline()
            # print(courseSummaryLine)
            # Read the rest of the summary lines, and
            # sort them just to ensure that we compare each
            # line to its ground truth:
            allDemographicsLines = fd.readlines()
            # allDemographicsLines.sort()
            self.assertEqual(
                "openedx,CME/MedStats/2013-2015,1,1,0,0,2,1,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0",
                allDemographicsLines[0].strip(),
            )
        os.remove(self.courseServer.latestDemographicsFilename)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testZipFiles(self):
        file1 = tempfile.NamedTemporaryFile()
        file2 = tempfile.NamedTemporaryFile()
        file1.write("foo")
        file2.write("bar")
        file1.flush()
        file2.flush()
        self.courseServer.zipFiles("/tmp/zipFileUnittest.zip", "foobar", [file1.name, file2.name])
        # Read it all back:
        zipfile.ZipFile("/tmp/zipFileUnittest.zip").extractall(pwd="foobar")

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testExportPIIDetails(self):
        pass

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testLearnerPerformance(self):
        pass

    def buildSupportTables(self, testSetToLoad):
        # Activities table:
        schema = OrderedDict(
            [
                ("course_display_name", "varchar(255)"),
                ("anon_screen_name", "varchar(40)"),
                ("event_type", "varchar(120)"),
                ("time", "datetime"),
                ("isVideo", "TINYINT"),
            ]
        )
        self.mysqldb.dropTable("unittest.Activities")
        self.mysqldb.createTable("unittest.Activities", schema)
        colNames = ["course_display_name", "anon_screen_name", "event_type", "time", "isVideo"]
        if testSetToLoad == TestSet.ONE_STUDENT_ONE_CLASS:
            colValues = ExportClassTest.oneStudentTestData
        elif testSetToLoad == TestSet.TWO_STUDENTS_ONE_CLASS:
            colValues = ExportClassTest.twoStudentsOneClassTestData
        elif testSetToLoad == TestSet.TWO_STUDENTS_TWO_CLASSES:
            colValues = ExportClassTest.twoStudentsTwoClassesTestData
        else:
            raise ValueError("Requested test set unavailable: %s" % testSetToLoad)
        self.mysqldb.bulkInsert("Activities", colNames, colValues)

        # Course runtimes:
        schema = OrderedDict(
            [
                ("course_display_name", "varchar(255)"),
                ("course_start_date", "datetime"),
                ("course_end_date", "datetime"),
            ]
        )
        self.mysqldb.dropTable("unittest.CourseRuntimes")
        self.mysqldb.createTable("unittest.CourseRuntimes", schema)
        colNames = ["course_display_name", "course_start_date", "course_end_date"]
        colValues = ExportClassTest.courseRuntimesData
        self.mysqldb.bulkInsert("CourseRuntimes", colNames, colValues)

        # UserGrade:
        schema = OrderedDict(
            [("user_int_id", "int"), ("course_id", "varchar(255)"), ("anon_screen_name", "varchar(40)")]
        )
        self.mysqldb.dropTable("unittest.UserGrade")
        self.mysqldb.createTable("unittest.UserGrade", schema)
        colNames = ["user_int_id", "course_id", "anon_screen_name"]
        colValues = ExportClassTest.userGradeData
        self.mysqldb.bulkInsert("UserGrade", colNames, colValues)

        # true_courseenrollment
        schema = OrderedDict(
            [
                ("user_id", "int"),
                ("course_display_name", "varchar(255)"),
                ("created", "datetime"),
                ("mode", "varchar(10)"),
            ]
        )
        self.mysqldb.dropTable("unittest.true_courseenrollment")
        self.mysqldb.createTable("unittest.true_courseenrollment", schema)
        colNames = ["user_id", "course_display_name", "created", "mode"]
        colValues = ExportClassTest.true_courseenrollmentData
        self.mysqldb.bulkInsert("unittest.true_courseenrollment", colNames, colValues)

        # UserCountry:
        schema = OrderedDict(
            [
                ("two_letter_country", "varchar(2)"),
                ("three_letter_country", "varchar(3)"),
                ("anon_screen_name", "varchar(40)"),
                ("country", "varchar(255)"),
            ]
        )
        self.mysqldb.dropTable("unittest.UserCountry")
        self.mysqldb.createTable("unittest.UserCountry", schema)
        colNames = ["two_letter_country", "three_letter_country", "anon_screen_name", "country"]
        colValues = ExportClassTest.userCountryData
        self.mysqldb.bulkInsert("unittest.UserCountry", colNames, colValues)

        # Demographics
        schema = OrderedDict(
            [
                ("anon_screen_name", "varchar(40)"),
                ("gender", "varchar(255)"),
                ("year_of_birth", "int(11)"),
                ("level_of_education", "varchar(42)"),
                ("country_three_letters", "varchar(3)"),
                ("country_name", "varchar(255)"),
            ]
        )
        self.mysqldb.dropTable("unittest.Demographics")
        self.mysqldb.execute("DROP VIEW IF EXISTS unittest.Demographics")
        self.mysqldb.createTable("unittest.Demographics", schema)
        colNames = [
            "anon_screen_name",
            "gender",
            "year_of_birth",
            "level_of_education",
            "country_three_letters",
            "country_name",
        ]
        colValues = ExportClassTest.demographicsData
        self.mysqldb.bulkInsert("unittest.Demographics", colNames, colValues)

        # Quarterly Report Demographics:
        # CourseInfo:
        schema = OrderedDict(
            [
                ("course_display_name", "varchar(255)"),
                ("course_catalog_name", "varchar(255)"),
                ("academic_year", "int"),
                ("quarter", "varchar(7)"),
                ("num_quarters", "int"),
                ("is_internal", "tinyint"),
                ("enrollment_start", "datetime"),
                ("start_date", "datetime"),
                ("end_date", "datetime"),
            ]
        )
        self.mysqldb.dropTable("unittest.CourseInfo")
        self.mysqldb.createTable("unittest.CourseInfo", schema)
        colNames = [
            "course_display_name",
            "course_catalog_name",
            "academic_year",
            "quarter",
            "num_quarters",
            "is_internal",
            "enrollment_start",
            "start_date",
            "end_date",
        ]
        colValues = ExportClassTest.courseInfoData
        self.mysqldb.bulkInsert("unittest.CourseInfo", colNames, colValues)

        # Forum table:
        # This tables gets loaded via a .sql file imported into mysql.
        # That file drops any existing unittest.contents, so we
        # don't do that here:
        mysqlCmdFile = "data/forumTests.sql"
        mysqlLoadCmd = ["mysql", "-u", "unittest"]
        with open(mysqlCmdFile, "r") as theStdin:
            # Drop table unittest.contents, and load a fresh copy:
            subprocess.call(mysqlLoadCmd, stdin=theStdin)
Esempio n. 45
0
class TestForumEtl(unittest.TestCase):

    # Forum rows have the following columns:
    #  type, anonymous, anonymous_to_peers, at_position_list, user_int_id, body, course_display_name, created_at, votes, count, down_count, up_count, up, down, comment_thread_id, parent_id, parent_ids, sk   

    # Correct result for relationization of tinyForum.json
    # (in <projDir>/src/forum_etl/data). This result is anonymized and not relatable,
    # i.e. poster name UIDs use integers, while other tables use hashes:
    tinyForumGoldAnonymized = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('anon_screen_name_redacted','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]
    
    # Gold result for anonymization that allows relating to other tables (i.e. hashes are constant)
    tinyForumGoldRelatable = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('abc','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('def','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('ghi','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> <nameRedac_abc> <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]
    
    # Gold result for non-anonymized forum:
    tinyForumGoldClear = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('otto_king','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('fritzL','Comment', 'False', 'False', '[]', 7L, ' Body with [email protected] email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster name Otto embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('bebeW','Comment', 'False', 'False', '[]', 10L, 'Body with 650-333-4567 a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name otto_king embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name Otto van Homberg embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]    

    def setUp(self):
        
        self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum")
        # Fill the little MongoDB with test JSON lines
        self.resetMongoTestDb()
        
        self.mysqldb = MySQLDB(user='******', db='unittest')
        # Start with an empty result MySQL table for each test:
        self.mysqldb.dropTable('contents')
        # Fill the fake UserGrade table with records of course participants:
        self.resetMySQLUserListDb()
        
        # Instantiate a Forum scrubber without the 
        # name of a bson file that contains forum
        # records. That 'None' for the bson file will
        # make the class understand that it's being
        # instantiated for a unit test. 
        self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade')
        self.forumScrubberRelatable  = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True)
        self.forumScrubberClear      = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)

    def tearDown(self):
        self.mysqldb.close()

    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testAnonymized(self):
        self.forumScrubberAnonymized.populateUserCache()
        self.forumScrubberAnonymized.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldAnonymized[rowNum], forumPost)
            
    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testNonAnonymizedRelatable(self):
        self.forumScrubberRelatable.populateUserCache()
        self.forumScrubberRelatable.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldRelatable[rowNum], forumPost)

    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testNonAnonymized(self):
        self.forumScrubberClear.populateUserCache()
        self.forumScrubberClear.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldClear[rowNum], forumPost)


    
    def resetMongoTestDb(self):
        self.mongoDb.clearCollection()
        # Use small, known forum collection:
        currDir = os.path.dirname(__file__)     
        with open(os.path.join(currDir, 'data/tinyForum.json'), 'r') as jsonFd:
            for line in jsonFd:
                forumPost = json.loads(line)
                self.mongoDb.insert(forumPost)

    def resetMySQLUserListDb(self):
        '''
        Prepare a MySQL table that mimicks EdxPrivate.UserGrade.
        '''
        
        userGradeColSpecs = OrderedDict(
                                        {
                                         'name' : 'varchar(255)',
                                         'screen_name' : 'varchar(255)',
                                         'grade' : 'int',
                                         'course_id' : 'varchar(255)',
                                         'distinction' : 'tinyint',
                                         'status' : 'varchar(50)',
                                         'user_int_id' : 'int(11)',
                                         'anon_screen_name' : 'varchar(40)'
                                         })
        self.mysqldb.dropTable('UserGrade')
        self.mysqldb.createTable('UserGrade', userGradeColSpecs)
        self.mysqldb.bulkInsert('UserGrade', 
                                ('name','screen_name','grade','course_id','distinction','status','user_int_id','anon_screen_name'),
                                [
                                 ('Otto van Homberg','otto_king',5,'oldCourse',0,'notpassing',5,'abc'),
                                 ('Andreas Fritz','fritzL',2,'newCourse',0,'notpassing',7,'def'),
                                 ('Bebe Winter', 'bebeW',10,'History of Baking',1,'passing',10,'ghi')
                                 ])
class CoursesGivenQuarter(object):
    '''
    Bus module that queries datastage for course information,
    given academic year, and quarter.
    '''
    
    MYSQL_PORT_LOCAL = 5555
    
    module_topic   = 'course_listing'

    def __init__(self, topic=None, user='******', passwd=''):
        '''
        Instantiated for each incoming bus message
        '''
        if topic is None:
            topic = CoursesGivenQuarter.module_topic
            
        self.mysqldb = MySQLDB(host='127.0.0.1', 
                               port=CoursesGivenQuarter.MYSQL_PORT_LOCAL, 
                               user=user, 
                               passwd=passwd, 
                               db='Edx')
        
        # The following statement is needed only 
        # if your callback is a method (rather than a top 
        # level function). That's because Python methods
        # take 'self' as a first argument, while the Bus 
        # expects a function that just takes topicName, msgText, and msgOffset.
        # The following statement creates a function wrapper around 
        # our callback method that has the leading 'self' parameter built 
        # in. The process is called function currying:
        
        self.requestDeliveryMethod = functools.partial(self.requestCoursesForQuarter)        
        
        # Create a BusAdapter instance:
        
        self.bus = BusAdapter()

        # Tell the bus that you are interested in the topic 'example_use',
        # and want callbacks to self.exampleDeliveryMethod whenever
        # a message arrives:
        
        self.bus.subscribeToTopic(topic, self.requestDeliveryMethod)
        
        # Now we do nothing. In a production system you 
        # would do something useful here:
        
        while True:
            # do anything you like
            self.bus.waitForMessage(CoursesGivenQuarter.module_topic)

    def requestCoursesForQuarter(self, topicName, msgText, msgOffset):
        '''
        This method is called whenever a message in topic
        'course_listing' is published by anyone on the bus.
        The msgText should have the JSON format:
        
            {'id'      : 'abcd'
             'content' : {'academic_year' : '2014',
                          'quarter'       : 'spring'},
             'time'    : '2015-05-27T18:12:22.706204',
                          }           
        
        Response will be of the form:
            {'id'          : 'abcd',
             'status'      : 'OK'
             'content'     : *****
            }
            
        Or, in case of error:
            {'id'          : 'abcd',
             'status'      : 'ERROR'
             'content'     : '<error msg'>
            }
        
        :param topicName: name of topic to which the arriving msg belongs: always learner_homework_history
        :type topicName: string
        :param msgText: text part of the message. JSON as specified above.
        :type msgText: string
        :param msgOffset: position of message in the topic's message history
        :type msgOffset: int
        '''
        try:
            # Import the message into a dict:
            msgDict = json.loads(msgText)
        except ValueError:
            self.bus.logError('Received msg with invalid wrapping JSON: %s (%s)' % str(msgText))
            return

        # Must have a learner message id:
        try:
            reqId = msgDict['id']
        except KeyError:
            self.returnError('NULL', "Error: message type not provided in an incoming request.")
            self.bus.logError("Message type not provided in %s" % str(msgDict))
            return

        # Must have a learner type == 'req'
        try:
            reqKey = msgDict['type']
            if reqKey != 'req':
                return
        except KeyError:
            self.returnError(reqId, "Error: message type not provided in %s" % str(msgDict))
            self.bus.logError('Received msg without a type field: %s' % str(msgText))
            return
        
        # The content field should be legal JSON; make a
        # dict from it:
        try:
            contentDict = msgDict['content']
        except KeyError:
            self.returnError(reqKey, "Error: no content field provided in %s" % str(msgDict))
            self.bus.logError('Received msg without a content field: %s' % str(msgText))
            return
        
        # Must have an academic year:
        try:
            academicYear = contentDict['academic_year']
        except KeyError:
            self.returnError(reqKey, "Error: academic year not provided in %s" % str(msgDict))
            self.bus.logError('Received msg without academic year in content field: %s' % str(msgText))            
            return
            
        # Must have a quarter:
        try:
            quarter = contentDict['quarter']
        except KeyError:
            self.returnError(reqKey, "Error: quarter not provided in %s" % str(msgDict))
            self.bus.logError('Received msg without quarter in content field: %s' % str(msgText))            
            return
        
        # Get an array of dicts, each dict being one MySQL record:
        #    course_display_name,
        #    course_catalog_name,
        #    is_internal
        
        resultArr = self.executeCourseInfoQuery(academicYear, quarter)
        
        # Turn result into an HTML table:
        htmlRes = self.buildHtmlTableFromQueryResult(resultArr)

        # Note that we pass the message type 'resp' 
        # to publish(), and that we specify that the
        # msg ID is to be the same as the incoming request.

        self.bus.publish(htmlRes, 
                         CoursesGivenQuarter.module_topic,
                         msgType='resp',
                         msgId=reqId)
        
    def executeCourseInfoQuery(self, academicYear, quarter):
        
        homeworkQuery = "SELECT course_display_name," +\
    			        "course_catalog_name," +\
    			        "is_internal " +\
    			   "FROM CourseInfo " +\
    			  "WHERE academic_year = '%s' " % academicYear +\
                    " AND quarter = '%s' " % quarter +\
                    ";"

        try:
            resIt = self.mysqldb.query(homeworkQuery)
        except Exception as e:
            self.returnError("Error: Call to database returned an error: '%s'" % `e`)
            self.bus.logError("Call to MySQL returned an error: '%s'" % `e`)
            return
            
        resultArr = []
        for res in resIt:
            resultArr.append(res)
            
        return resultArr
    
    def returnError(self, req_id, errMsg):
        self.bus.publish(errMsg, 
                         CoursesGivenQuarter.module_topic,
                         msgId=req_id, 
                         msgType='resp')

    def buildHtmlTableFromQueryResult(self, resTupleArr):
        htmlStr   = '<table border=1><tr><td><b>Course</b></td><td><b>Description</b></td><td><b>Internal-Only</b></td></tr>'
        strResArr = []
        for (courseDisplayName, courseCatalogName, isInternal) in resTupleArr:
            strResArr.append("<tr><td>%s</td><td>%s</td><td>%s</td></tr>" %
                             (courseDisplayName, courseCatalogName, isInternal))
        htmlStr = htmlStr + ' '.join(strResArr) + '</table>'
        return htmlStr
            
    def close(self):
        try:
            self.mysqldb.close()
        except:
            pass
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'
    
    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd  = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
        self.db.createTable(UserCountryTableCreator.DEST_TABLE, 
                                           OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""',
                                            'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""',
                                            'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""',
                                            'country' : 'varchar(255) NOT NULL DEFAULT ""'}))
        
    def fillTable(self):
        values = []
        for (user, ip3LetterCountry) in self.db.query("SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"):
            try:
                (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry)
            except (ValueError,TypeError,KeyError) as e:
                sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`))
                continue
            values.append(tuple(['%s'%user,'%s'%twoLetterCode,'%s'%threeLetterCode,'%s'%country]))
        
        colNameTuple = ('anon_screen_name','two_letter_country','three_letter_country','country')
        self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values)

    def makeIndex(self):
        self.db.execute("CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);")
        self.db.execute("CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);")

    def close(self):
        self.db.close()