class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'
    
    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd  = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
        self.db.createTable(UserCountryTableCreator.DEST_TABLE, 
                                           OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""',
                                            'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""',
                                            'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""',
                                            'country' : 'varchar(255) NOT NULL DEFAULT ""'}))
        
    def fillTable(self):
        values = []
        for (user, ip3LetterCountry) in self.db.query("SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"):
            try:
                (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry)
            except (ValueError,TypeError,KeyError) as e:
                sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`))
                continue
            values.append(tuple(['%s'%user,'%s'%twoLetterCode,'%s'%threeLetterCode,'%s'%country]))
        
        colNameTuple = ('anon_screen_name','two_letter_country','three_letter_country','country')
        self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values)

    def makeIndex(self):
        self.db.execute("CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);")
        self.db.execute("CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);")

    def close(self):
        self.db.close()
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'

    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
        self.db.createTable(
            UserCountryTableCreator.DEST_TABLE,
            OrderedDict({
                'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""',
                'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""',
                'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""',
                'country': 'varchar(255) NOT NULL DEFAULT ""'
            }))

    def fillTable(self):
        values = []
        for (user, ip3LetterCountry) in self.db.query(
                "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"
        ):
            try:
                (twoLetterCode, threeLetterCode, country
                 ) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry)
            except (ValueError, TypeError, KeyError) as e:
                sys.stderr.write(
                    "Could not look up one country from (%s/%s): %s\n" %
                    (user, ip3LetterCountry, ` e `))
                continue
            values.append(
                tuple([
                    '%s' % user,
                    '%s' % twoLetterCode,
                    '%s' % threeLetterCode,
                    '%s' % country
                ]))

        colNameTuple = ('anon_screen_name', 'two_letter_country',
                        'three_letter_country', 'country')
        self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple,
                           values)

    def makeIndex(self):
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);"
        )
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);"
        )

    def close(self):
        self.db.close()
    def __init__(self, user, db, table, totalRows=None):
        '''
        Constructor
        '''
        home = os.environ['HOME']
        with open(os.path.join(home, '.ssh/mysql')) as pwdFd:
            pwd = pwdFd.read().strip()

        db = MySQLDB(db=db, user=user, passwd=pwd)
        # Number of rows pulled from EventIp:
        rowCount = 0

        # First row to get in the select statement:
        nextBatchStartRow = -UniqueAnonIpExtractor.BATCH_SIZE

        with open('/tmp/anonIps.csv', 'w') as fd:
            #*****with sys.stdout as fd:
            fd.write('anon_screen_name,ip\n')
            numRecords = db.query('SELECT count(*) from EventIp').next()
            if numRecords == 0:
                sys.exit()
            if totalRows is None:
                totalRows = numRecords
            while rowCount < numRecords and rowCount < totalRows:
                nextBatchStartRow += UniqueAnonIpExtractor.BATCH_SIZE
                for (anon_screen_name, ip)  in db.query('SELECT anon_screen_name, event_ip from EventIp LIMIT %s,%s' % \
                                                        (nextBatchStartRow, UniqueAnonIpExtractor.BATCH_SIZE)):
                    if UniqueAnonIpExtractor.seenAnons.get(
                            anon_screen_name, None) is None:
                        # The anon_screen_name in the db could actually be NULL, a.k.a. None.
                        # Ignore those:
                        if anon_screen_name is not None:
                            fd.write(anon_screen_name + ',' + ip + '\n')
                            UniqueAnonIpExtractor.seenAnons[
                                anon_screen_name] = 1
                    rowCount += 1
                    if (rowCount % UniqueAnonIpExtractor.BATCH_SIZE) == 0:
                        print("Did %s rows." % rowCount)
                    if rowCount >= totalRows:
                        break
            print('Finished %s rows; %s unique anon_screen_names' %
                  (rowCount, len(UniqueAnonIpExtractor.seenAnons.keys())))
Example #4
0
class AnonAndModIDAdder(object):

    # Number of rows to process in memory
    # before writing to ActivityGrade:
    BATCH_SIZE = 10000
    
    # For explanation of the following regex patterns,
    # see header comment of parseStateJSON:
    SOLUTION_RESULT_PATTERN  = re.compile(r'[^"]*correctness": "([^"]*)')
    SOLUTION_ANSWERS_PATTERN = re.compile(r'[^:]*: "([^"]*)"')
    
    ACTIVITY_GRADE_COL_NAMES = [
                'activity_grade_id',
                'student_id',
                'course_display_name',
                'grade',
                'max_grade',
                'percent_grade',
                'parts_correctness',
                'answers',
                'num_attempts',
                'first_submit',
                'last_submit',
                'module_type',
                'anon_screen_name',
                'resource_display_name',
                'module_id'
                ]
    
    # Indices into tuples from StudentmoduleExcerpt:
    STUDENT_INT_ID_INDEX = 1
    GRADE_INDEX = 3
    MAX_GRADE_INDEX = 4
    PERCENT_GRADE_INDEX = 5
    PARTS_CORRECTNESS_INDEX = 6
    ANSWERS_INDEX = 7
    NUM_ATTEMPTS_INDEX = 8
    ANON_SCREEN_NAME_INDEX = 12
    RESOURCE_DISPLAY_NAME_INDEX = 13
    MODULE_ID_INDEX = 14
    
    
    def __init__(self, uid, pwd, db='Edx', testing=False):
        '''
        ****** Update this comment header
        Make connection to MySQL wrapper.
        @param uid: MySQL user under which to log in. Assumed to be other than None
        @type uid: String
        @param pwd: MySQL password for user uid. May be None.
        @type pwd: {String | None}
        '''
        self.db = db
        if pwd is None:
            self.mysqldbStudModule = MySQLDB(user=uid, db=db)
        else:
            self.mysqldbStudModule = MySQLDB(user=uid, passwd=pwd, db=db)
        # Create a string with the parameters of the SELECT call,
        # (activity_grade_id,student_id,...):
        self.colSpec = AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[0]
        for colName in AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES[1:]:
            self.colSpec += ',' + colName
    
        self.cacheIdInt2Anon(testing)
        self.pullRowByRow()

    def cacheIdInt2Anon(self, testing=False):
        '''
        Builds a dict to map platform integers to anon_screen_names. 
        
    :param testing: If set true, then all tables are assumed to be in MySQL DB unittest.
        :type testing: boolean
        '''
        self.int2AnonCache = {}
        if testing:
            queryIt = self.mysqldbStudModule.query("SELECT student_id AS user_int_id, \
                                                           unittest.UserGrade.anon_screen_name \
                                                      FROM unittest.StudentmoduleExcerpt LEFT JOIN unittest.UserGrade \
                                                        ON unittest.StudentmoduleExcerpt.student_id = unittest.UserGrade.user_int_id;")
        else:
            queryIt = self.mysqldbStudModule.query("SELECT student_id AS user_int_id, \
                                                           EdxPrivate.UserGrade.anon_screen_name \
                                                      FROM edxprod.StudentmoduleExcerpt LEFT JOIN EdxPrivate.UserGrade \
                                                        ON edxprod.StudentmoduleExcerpt.student_id = EdxPrivate.UserGrade.user_int_id;")
        for user_int_id, anon_screen_name in queryIt:
            self.int2AnonCache[user_int_id] = anon_screen_name;

    def pullRowByRow(self):
        rowBatch = []
        theQuery = "SELECT activity_grade_id,student_id,\
                    	   course_display_name,grade,max_grade,percent_grade,\
                    	   parts_correctness,answers,num_attempts,first_submit,\
                    	   last_submit,module_type,anon_screen_name,\
                    	   resource_display_name,module_id \
                    FROM edxprod.StudentmoduleExcerpt \
                    WHERE isTrueCourseName(course_display_name) = 1;"
        if self.db == 'unittest':
            queryIt = self.mysqldbStudModule.query("SELECT %s FROM unittest.StudentmoduleExcerpt;" % self.colSpec)
        else:
            #**********queryIt = self.mysqldbStudModule.query("SELECT %s FROM edxprod.StudentmoduleExcerpt;" % self.colSpec)
            queryIt = self.mysqldbStudModule.query(theQuery)
        for studmodTuple in queryIt:
            # Results return as tuples, but we need to change tuple items by index.
            # So must convert to list:
            studmodTuple = list(studmodTuple)
            # Resolve the module_id into a human readable resource_display_name:
            moduleID = studmodTuple[AnonAndModIDAdder.MODULE_ID_INDEX]
            studmodTuple[AnonAndModIDAdder.RESOURCE_DISPLAY_NAME_INDEX] = self.getResourceDisplayName(moduleID)
            
            # Compute the anon_screen_name:
            studentIntId = studmodTuple[AnonAndModIDAdder.STUDENT_INT_ID_INDEX]
            try:
                studmodTuple[AnonAndModIDAdder.ANON_SCREEN_NAME_INDEX] = self.int2AnonCache[studentIntId]
            except TypeError:
                studmodTuple[AnonAndModIDAdder.ANON_SCREEN_NAME_INDEX] = ''

            # Pick grade and max_grade out of the row,
            # compute the percentage, and place that 
            # back into the row in col 
            grade = studmodTuple[AnonAndModIDAdder.GRADE_INDEX]
            max_grade = studmodTuple[AnonAndModIDAdder.MAX_GRADE_INDEX]
            percent_grade = 'NULL'
            try:
                percent_grade = round((int(grade) * 100.0/ int(max_grade)), 2)
            except:
                pass
            studmodTuple[AnonAndModIDAdder.PERCENT_GRADE_INDEX] = str(percent_grade)

            # Parse 'state' column from JSON and put result into plusses/minusses column:
            (partsCorrectness, answers, numAttempts) = \
                self.parseStateJSON(studmodTuple[AnonAndModIDAdder.PARTS_CORRECTNESS_INDEX])
            
            studmodTuple[AnonAndModIDAdder.PARTS_CORRECTNESS_INDEX] = partsCorrectness
            studmodTuple[AnonAndModIDAdder.ANSWERS_INDEX] = ','.join(answers)
            studmodTuple[AnonAndModIDAdder.NUM_ATTEMPTS_INDEX] = numAttempts
            
            rowBatch.append(studmodTuple)
            if len(rowBatch) >= AnonAndModIDAdder.BATCH_SIZE:
                self.mysqldbStudModule.bulkInsert('ActivityGrade', AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES, rowBatch)
                rowBatch = []
        if len(rowBatch) > 0:
            self.mysqldbStudModule.bulkInsert('ActivityGrade', AnonAndModIDAdder.ACTIVITY_GRADE_COL_NAMES, rowBatch)
    
    def getResourceDisplayName(self, moduleID):
        moduleName = Utils.getModuleNameFromID(moduleID)
        return moduleName


    def parseStateJSON(self, jsonStateStr, srcTableName='courseware_studentmodule'):
        '''
        Given the 'state' column from a courseware_studentmodule
        column, return a 3-tuple: (plusMinusStr, answersArray, numAttempts)
        The plusMinusStr will be a string of '+' and '-'. A
        plus means that the problem solution part of an assignment
        submission was correct; a '-' means it was incorrect. The
        plus/minus indicators are arranged in the order of the problem
        subparts; like '++-' for a three-part problem in which the student
        got the first two correct, the last one incorrect.
        
        The answersArray will be an array of answers to the corresponding
        problems, like ['choice_0', 'choice_1'].
        
        Input for a problem solution with two parts looks like this::
            {   		           
    		 "correct_map": {
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": {
    		     "hint": "",
    		     "hintmode": null,
    		     "correctness": "correct",
    		     "npoints": null,
    		     "msg": "",
    		     "queuestate": null
    		   },
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": {
    		     "hint": "",
    		     "hintmode": null,
    		     "correctness": "correct",
    		     "npoints": null,
    		     "msg": "",
    		     "queuestate": null
    		   }
    		 },
    		 "input_state": {
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": {
    		     
    		   },
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": {
    		     
    		   }
    		 },
    		 "attempts": 3,
    		 "seed": 1,
    		 "done": true,
    		 "student_answers": {
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": "choice_3",
    		   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": "choice_0"
    		 }
        }   		        
        
        This structure is ugly enough even when imported into a dict
        via json.loads() that a regular expression solution is faster.
        Three regexp are used:
          - SOLUTION_RESULT_PATTERN  = re.compile(r'[^"]*correctness": "([^"]*)')
              looks for the correctness entries: 'correct', 'incorrect'.
              First the regex throws away front parts the JSON that do not consist
              of 'correctness": '. That's the '"[^"]*correctness": "' par
              of the regex
              Next, a capture group grabs all letters that are not a double
              quote. That's the '([^"]*)' part of the regex. Those capture
              groups will contain the words 'correct' or 'incorrect'.
               
          - SOLUTION_ANSWERS_PATTERN = re.compile(r'[^:]*: "([^"]*)"')
              looks for the answers themselves: 'choice_0', etc. This pattern
              assumes that we first cut off from the JSON all the front part up
              to 'student_answers":'. The regex operates over the rest:
              The '[^:]*: "' skips over all text up to the next colon, followed
              by a space and opening double quote. The capture group grabs the 
              answer, as in 'choice_0'. 
        
        @param jsonStateStr:
        @type jsonStateStr:
        @param srcTableName:
        @type srcTableName:
        @return: plus/minus string, array of participant's answers, number of attempts. 
               If number of attempts is -1 the row was not a problem statement,
               or number of attempts was otherwise unavailable.
        @rtype: (string, [string], int)
        '''
        successResults = ''
        # The following badAnswers array is filled with
        # just the wrong answers. It's maintained, but
        # not currently returned, b/c users didn't feel
        # they needed it.
        badAnswers = [] 
        answers = []
        numAttempts = -1
        
        # Many state entries are not student problem result 
        # submissions, but of the form "{'postion': 4}".
        # Weed those out:
        if jsonStateStr.find('correct_map') == -1:
            #return (successResults, badAnswers, numAttempts)
            return (successResults, answers, numAttempts)
        
        # Get the ['correct','incorrect',...] array;
        # we'll use it later on:
        allSolutionResults = AnonAndModIDAdder.SOLUTION_RESULT_PATTERN.findall(jsonStateStr)
        
        
        # Next, get all the answers themselves.
        # Chop off all the JSON up to 'student_answers":':
        chopTxtMarker = 'student_answers":'
        chopPos = jsonStateStr.find(chopTxtMarker)
        if chopPos == -1:
            # Couldn't find the student answers; fine;
            #return (successResults, badAnswers, numAttempts)
            return (successResults, answers, numAttempts)
        else:
            # Get left with str starting at '{' in
            # "student_answers": {
            #   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_2_1": "choice_3",
            #   "i4x-Medicine-HRP258-problem-8dd11b4339884ab78bc844ce45847141_3_1": "choice_0"
            restJSON = jsonStateStr[chopPos+len(chopTxtMarker):]
            # ... and put the regex to work:
            answers = AnonAndModIDAdder.SOLUTION_ANSWERS_PATTERN.findall(restJSON)
        
        # Find number of attempts:
        # Find '"attempts": 3,...':
        chopTxtMarker = '"attempts": '
        chopPos = jsonStateStr.find(chopTxtMarker)
        if chopPos > 0:
            upToNum = jsonStateStr[chopPos+len(chopTxtMarker):]
            try:
                # The 'str' part of 'str(upToNum)' is needed b/c
                # the JSON is unicode, and isdigit() barfs when given
                # unicode:
                numAttempts = int("".join(itertools.takewhile(str.isdigit, str(upToNum))))
            except ValueError:
                # Couldn't find the number of attempts.
                # Just punt.
                pass
            except TypeError:
                # Unicode garbage, clearly not a digit
                pass
                
        # Go through the ['correct','incorrect',...] array,
        # and take two actions: if correct, add a '+' to
        # the successResults str; if 'incorrect' then add
        # a '-' to successResults, and transfer the 'bad'
        # answer to the badAnswers array:
        
        for (i, correctness) in enumerate(allSolutionResults):
            if  correctness == 'correct':
                successResults += '+'
            else:
                successResults += '-'
                try:
                    badAnswers.append(answers[i])
                except IndexError:
                    badAnswers.append('<unknown>')

        #return (successResults, badAnswers, numAttempts)
        return (successResults, answers, numAttempts)
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'
    # Number of anon ids-country-2-letter-3-letter
    # tuples to accumulate before inserting into
    # UserCountry:
    INSERT_BULK_SIZE = 15000

    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        # Make sure table exists. It should, and it should be filled
        # with all anon_screen_name and countries up the previous
        # load:
        createCmd = '''CREATE TABLE UserCountry (
                         anon_screen_name varchar(40) NOT NULL DEFAULT "",
                         two_letter_country varchar(2) NOT NULL DEFAULT "",
                         three_letter_country varchar(3) NOT NULL DEFAULT "",
                         country varchar(255) NOT NULL DEFAULT ""
                         ) ENGINE=MyISAM;
                         '''
        self.db.dropTable('UserCountry')
        print("Creating table UserCountry...")
        self.db.execute(createCmd)
        print("Done creating table UserCountry.")

    def fillTable(self):
        query = "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"
        query_res_it = self.db.query(query)
        done = False
        # Order of columns for insert:
        colNameTuple = ('anon_screen_name', 'two_letter_country',
                        'three_letter_country', 'country')

        while not done:
            values = []
            print("%s: Starting one set of %s lookups..." %\
                  (str(datetime.datetime.today()),
                   UserCountryTableCreator.INSERT_BULK_SIZE))
            for _ in range(UserCountryTableCreator.INSERT_BULK_SIZE):
                try:
                    (anon_screen_name, ip3LetterCountry) = query_res_it.next()
                except StopIteration:
                    done = True
                    break
                # Try translating:
                try:
                    (twoLetterCode, threeLetterCode,
                     country) = self.ipCountryXlater.getBy3LetterCode(
                         ip3LetterCountry)
                except (ValueError, TypeError, KeyError):
                    twoLetterCode = 'XX'
                    threeLetterCode = 'XXX'
                    country = 'Not in lookup tbl'
                    #sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`))
                values.append(
                    tuple([
                        '%s' % anon_screen_name,
                        '%s' % twoLetterCode,
                        '%s' % threeLetterCode,
                        '%s' % country
                    ]))

            # Insert this chunk into the UserCountry table
            print("%s: Inserting %s rows into UserCountry table..." %
                  (str(datetime.datetime.today()), len(values)))
            (errors,
             warnings) = self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE,
                                            colNameTuple, values)
            if errors is not None:
                print('Error(s) during UserCountry insert: %s' % errors)
                sys.exit(1)
            if warnings is not None:
                print('Warning(s) during UserCountry insert: %s' % warnings)

            print("%s: Done inserting %s rows into UserCountry table..." %
                  (str(datetime.datetime.today()), len(values)))
            # ... and loop to process the next INSERT_BULK_SIZE batch

    def makeIndex(self):
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);"
        )
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);"
        )

    def close(self):
        self.db.close()
class TestAddAnonToActivityGrade(unittest.TestCase):

    studentmoduleExcerptSchema = OrderedDict({
                'activity_grade_id' : 'INT',
                'student_id' : 'INT',
                'course_display_name' : 'VARCHAR(255)',
                'grade' : 'VARCHAR(5)',
                'max_grade' : 'DOUBLE',
                'percent_grade' : 'DOUBLE',
                'parts_correctness' : 'VARCHAR(255)',
                'answers' : 'VARCHAR(255)',
                'num_attempts' : 'INT',
                'first_submit' : 'DATETIME',
                'last_submit' : 'DATETIME',
                'module_type' : 'VARCHAR(255)',
                'anon_screen_name' : 'VARCHAR(40)',
                'resource_display_name' : 'VARCHAR(255)',
                'module_id' : 'VARCHAR(255)'
                })
    
    studentmoduleExcerptColNames = [
                'activity_grade_id',
                'student_id',
                'course_display_name',
                'grade',
                'max_grade',
                'percent_grade',
                'parts_correctness',
                'answers',
                'num_attempts',
                'first_submit',
                'last_submit',
                'module_type',
                'anon_screen_name',
                'resource_display_name',
                'module_id'
                ]
    state1 = ' {"correct_map": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {"hint": "", "hintmode": null, "correctness": "correct", "npoints": null, "msg": "", "queuestate": null}}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}, "attempts": 1, "seed": 1, "done": true, "student_answers": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": "choice_1"}} '
    state2 = '{"correct_map": {}, "seed": 1, "student_answers": {}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}}'
    state3 = '{"position": 1}'
    
    modid1 = 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'
    modid2 = 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'
    modid3 = 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'
    studentmoduleExcerptValues = \
                [
                [0,1,'myCourse',3,10,-1.0,state1,'',-1,'2014-01-10 04:10:45','2014-02-10 10:14:40','modtype1','','',modid1],
                [1,2,'myCourse',5,10,-1.0,state2,'',-1,'2014-01-10 11:30:23','2014-02-10 14:30:12','modtype2','','',modid2],                
                [2,3,'yourCourse',8,10,-1.0,state3,'',-1,'2014-01-10 18:34:12','2014-02-10 19:10:33','modtype2','','',modid3]                                
               ]

    def setUp(self):
        self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[0]
        for colName in  TestAddAnonToActivityGrade.studentmoduleExcerptColNames[1:]:
            self.allColNames += ',' + colName
        
        self.db = MySQLDB(user='******', passwd='', db='unittest')
        self.db.dropTable('StudentmoduleExcerpt')
        self.db.createTable('StudentmoduleExcerpt', 
                            TestAddAnonToActivityGrade.studentmoduleExcerptSchema,
                            temporary=False)
                            #***temporary=True)
        self.db.bulkInsert('StudentmoduleExcerpt',
                           TestAddAnonToActivityGrade.studentmoduleExcerptColNames,
                           TestAddAnonToActivityGrade.studentmoduleExcerptValues)
        self.db.createTable('ActivityGrade', TestAddAnonToActivityGrade.studentmoduleExcerptSchema)
        # Make sure there isn't left over content (if the table existed):
        self.db.truncateTable('ActivityGrade')
        self.db.close()
    def tearDown(self):
        self.db = MySQLDB(user='******', passwd='', db='unittest')
        # Can't drop tables: hangs
        #self.db.dropTable('StudentmoduleExcerpt')
        #self.db.dropTable('ActivityGrade')
        self.db.close()
        pass
        
        
    def testAddAnonToActivityTable(self):
        try:
            # Modify the fake courseware_studentmodule excerpt
            # to add anon_screen_name, computer plusses/minusses, 
            # compute grade percentage, etc:
            AnonAndModIDAdder('unittest', '', db='unittest')
            self.db = MySQLDB(user='******', passwd='', db='unittest')
            for rowNum, row in enumerate(self.db.query('SELECT %s FROM ActivityGrade;' % self.allColNames)):
                #print(row)
                if rowNum == 0:
                    self.assertEqual((0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1, datetime.datetime(2014, 1, 10, 4, 10, 45), datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1', '', 'Guided Walkthrough', 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'), 
                                     row)
                elif rowNum == 1:
                    self.assertEqual((1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1, datetime.datetime(2014, 1, 10, 11, 30, 23), datetime.datetime(2014, 2, 10, 14, 30, 12), 'modtype2', '', 'Evaluation', 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'),
                                     row)
                elif rowNum == 2:
                    self.assertEqual((2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1, datetime.datetime(2014, 1, 10, 18, 34, 12), datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2', '', 'Introduction', 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'),
                                     row)         
        finally:
            self.db.close()
Example #7
0
    def setUpClass(cls):
        # Ensure that a user unittest with the proper
        # permissions exists in the db:
        TestPymysqlUtils.env_ok = True
        TestPymysqlUtils.err_msg = ''
        try:
            needed_grants = ['SELECT', 'INSERT', 'UPDATE', 
                             'DELETE', 'CREATE', 'CREATE TEMPORARY TABLES', 
                             'DROP', 'ALTER']
            mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
            grant_query = 'SHOW GRANTS FOR unittest@localhost'
            query_it = mysqldb.query(grant_query)
            # First row of the SHOW GRANTS response should be
            # one of:
            first_grants = ["GRANT USAGE ON *.* TO 'unittest'@'localhost'",
                            "GRANT USAGE ON *.* TO `unittest`@`localhost`"
                            ]
            # Second row depends on the order in which the 
            # grants were provided. The row will look something
            # like:
            #   GRANT SELECT, INSERT, UPDATE, DELETE, ..., CREATE, DROP, ALTER ON `unittest`.* TO 'unittest'@'localhost'
            # Verify:
            usage_grant = query_it.next()
            if usage_grant not in first_grants:
                TestPymysqlUtils.err_msg = '''
                    User 'unittest' is missing USAGE grant needed to run the tests.
                    Also need this in your MySQL: 
                    
                          %s
                    ''' % 'GRANT %s ON unittest.* TO unittest@localhost' % ','.join(needed_grants)
                TestPymysqlUtils.env_ok = False
                return
            grants_str = query_it.next()
            for needed_grant in needed_grants:
                if grants_str.find(needed_grant) == -1:
                    TestPymysqlUtils.err_msg = '''
                    User 'unittest' does not have the '%s' permission needed to run the tests.
                    Need this in your MySQL:
                    
                        %s
                    ''' % (needed_grant, 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants))
                    TestPymysqlUtils.env_ok = False
                    return  
        except (ValueError,RuntimeError):
            TestPymysqlUtils.err_msg = '''
               For unit testing, localhost MySQL server must have 
               user 'unittest' without password, and a database 
               called 'unittest'. To create these prerequisites 
               in MySQL:
               
                    CREATE USER unittest@localhost;
                    CREATE DATABASE unittest; 
               This user needs permissions:
                    %s 
               ''' % 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants)
            TestPymysqlUtils.env_ok = False

        # Check MySQL version:
        try:
            (major, minor) = TestPymysqlUtils.get_mysql_version()
        except Exception as e:
            raise OSError('Could not get mysql version number: %s' % str(e))
            
        if major is None:
            print('Warning: MySQL version number not found; testing as if V5.7')
            TestPymysqlUtils.major = 5
            TestPymysqlUtils.minor = 7
        else:
            TestPymysqlUtils.major = major
            TestPymysqlUtils.minor = minor
            known_versions = [(5,6), (5,7), (8,0)]
            if (major,minor) not in known_versions:
                print('Warning: MySQL version is %s.%s; but testing as if V5.7')
                TestPymysqlUtils.major = 5
                TestPymysqlUtils.minor = 7
Example #8
0
class TestPymysqlUtils(unittest.TestCase):
    '''
    Tests pymysql_utils.    
    '''

    @classmethod
    def setUpClass(cls):
        # Ensure that a user unittest with the proper
        # permissions exists in the db:
        TestPymysqlUtils.env_ok = True
        TestPymysqlUtils.err_msg = ''
        try:
            needed_grants = ['SELECT', 'INSERT', 'UPDATE', 
                             'DELETE', 'CREATE', 'CREATE TEMPORARY TABLES', 
                             'DROP', 'ALTER']
            mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
            grant_query = 'SHOW GRANTS FOR unittest@localhost'
            query_it = mysqldb.query(grant_query)
            # First row of the SHOW GRANTS response should be
            # one of:
            first_grants = ["GRANT USAGE ON *.* TO 'unittest'@'localhost'",
                            "GRANT USAGE ON *.* TO `unittest`@`localhost`"
                            ]
            # Second row depends on the order in which the 
            # grants were provided. The row will look something
            # like:
            #   GRANT SELECT, INSERT, UPDATE, DELETE, ..., CREATE, DROP, ALTER ON `unittest`.* TO 'unittest'@'localhost'
            # Verify:
            usage_grant = query_it.next()
            if usage_grant not in first_grants:
                TestPymysqlUtils.err_msg = '''
                    User 'unittest' is missing USAGE grant needed to run the tests.
                    Also need this in your MySQL: 
                    
                          %s
                    ''' % 'GRANT %s ON unittest.* TO unittest@localhost' % ','.join(needed_grants)
                TestPymysqlUtils.env_ok = False
                return
            grants_str = query_it.next()
            for needed_grant in needed_grants:
                if grants_str.find(needed_grant) == -1:
                    TestPymysqlUtils.err_msg = '''
                    User 'unittest' does not have the '%s' permission needed to run the tests.
                    Need this in your MySQL:
                    
                        %s
                    ''' % (needed_grant, 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants))
                    TestPymysqlUtils.env_ok = False
                    return  
        except (ValueError,RuntimeError):
            TestPymysqlUtils.err_msg = '''
               For unit testing, localhost MySQL server must have 
               user 'unittest' without password, and a database 
               called 'unittest'. To create these prerequisites 
               in MySQL:
               
                    CREATE USER unittest@localhost;
                    CREATE DATABASE unittest; 
               This user needs permissions:
                    %s 
               ''' % 'GRANT %s ON unittest.* TO unittest@localhost;' % ','.join(needed_grants)
            TestPymysqlUtils.env_ok = False

        # Check MySQL version:
        try:
            (major, minor) = TestPymysqlUtils.get_mysql_version()
        except Exception as e:
            raise OSError('Could not get mysql version number: %s' % str(e))
            
        if major is None:
            print('Warning: MySQL version number not found; testing as if V5.7')
            TestPymysqlUtils.major = 5
            TestPymysqlUtils.minor = 7
        else:
            TestPymysqlUtils.major = major
            TestPymysqlUtils.minor = minor
            known_versions = [(5,6), (5,7), (8,0)]
            if (major,minor) not in known_versions:
                print('Warning: MySQL version is %s.%s; but testing as if V5.7')
                TestPymysqlUtils.major = 5
                TestPymysqlUtils.minor = 7
        

    def setUp(self):
        if not TestPymysqlUtils.env_ok:
            raise RuntimeError(TestPymysqlUtils.err_msg)
        try:
            self.mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
        except ValueError as e:
            self.fail(str(e) + " (For unit testing, localhost MySQL server must have user 'unittest' without password, and a database called 'unittest')")
            
        # Make MySQL version more convenient to check:
        if (TestPymysqlUtils.major == 5 and TestPymysqlUtils.minor >= 7) or \
            TestPymysqlUtils.major >= 8:
            self.mysql_ge_5_7 = True
        else:
            self.mysql_ge_5_7 = False


    def tearDown(self):
        if self.mysqldb.isOpen():
            self.mysqldb.dropTable('unittest')
            # Make sure the test didn't set a password
            # for user unittest in the db:
            self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = '';")
            self.mysqldb.close()

    # ----------------------- Table Manilupation -------------------------

    #-------------------------
    # Creating and Dropping Tables 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testCreateAndDropTable(self):
        mySchema = {
          'col1' : 'INT',
          'col2' : 'varchar(255)',
          'col3' : 'FLOAT',
          'col4' : 'TEXT',
          #'col5' : 'JSON'  # Only works MySQL 5.7 and up.
          }
        self.mysqldb.createTable('myTbl', mySchema, temporary=False)
        # Get (('col4', 'text'), ('col2', 'varchar(255)'), ('col3', 'float'), ('col1', 'int(11)'))
        # in some order:
        cols = self.mysqldb.query('''SELECT COLUMN_NAME,COLUMN_TYPE 
                                      FROM information_schema.columns 
                                    WHERE TABLE_SCHEMA = 'unittest' 
                                      AND TABLE_NAME = 'myTbl';
                                      '''
                                ) 

        self.assertEqual(sorted(cols), 
                         [('col1', 'int(11)'), 
                          ('col2', 'varchar(255)'), 
                          ('col3', 'float'), 
                          ('col4', 'text')]
                         )   
        
        # Query mysql information schema to check for table
        # present. Use raw cursor to test independently from
        # the pymysql_utils query() method:
        
        self.mysqldb.dropTable('myTbl')
        cursor = self.mysqldb.connection.cursor()
        tbl_exists_query = '''
                  SELECT table_name 
                    FROM information_schema.tables 
                   WHERE table_schema = 'unittest' 
                     AND table_name = 'myTbl';
                     '''
        cursor.execute(tbl_exists_query)
        self.assertEqual(cursor.rowcount, 0)
        cursor.close()

    #-------------------------
    # Creating Temporary Tables 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testCreateTempTable(self):
        mySchema = {
          'col1' : 'INT',
          'col2' : 'varchar(255)',
          'col3' : 'FLOAT',
          'col4' : 'TEXT',
          #'col5' : 'JSON'  # Only works MySQL 5.7 and up.
          }
        self.mysqldb.createTable('myTbl', mySchema, temporary=True)
        
        # Check that tbl exists.
        # NOTE: can't use query to mysql.informationschema,
        # b/c temp tables aren't listed there.
        
        try:
            # Will return some tuple; we don't
            # care what exaclty, as long as the
            # cmd doesn't fail:
            self.mysqldb.query('DESC myTbl').next()
        except Exception:
            self.fail('Temporary table not found after creation.')
        
        # Start new session, which should remove the table.
        # Query mysql information schema to check for table
        # present. Use raw cursor to test independently from
        # the pymysql_utils query() method:
        
        self.mysqldb.close()

        try:
            self.mysqldb = MySQLDB(host='localhost', port=3306, user='******', db='unittest')
        except ValueError as e:
            self.fail(str(e) + "Could not re-establish MySQL connection.")

        # NOTE: can't use query to mysql.informationschema,
        # b/c temp tables aren't listed there.
        
        try:
            self.mysqldb.query('DESC myTbl').next()
            self.fail("Temporary table did not disappear with session exit.")
        except ValueError:
            pass


    #-------------------------
    # Table Truncation 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testTruncate(self):
      
        # Initial test db with known num of rows:
        rows_in_test_db = self.buildSmallDb()
        cursor = self.mysqldb.connection.cursor()
        cursor.execute('SELECT * FROM unittest;')
        self.assertEqual(cursor.rowcount, rows_in_test_db)
        
        self.mysqldb.truncateTable('unittest')
        
        cursor.execute('SELECT * FROM unittest;')
        self.assertEqual(cursor.rowcount, 0)
        cursor.close()

    # ----------------------- Insertion and Update -------------------------
    
    #-------------------------
    # Insert One Row 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testInsert(self):
        schema = OrderedDict([('col1', 'INT'), ('col2', 'TEXT')])
        self.mysqldb.createTable('unittest', schema)
        colnameValueDict = OrderedDict([('col1', 10)])
        self.mysqldb.insert('unittest', colnameValueDict)
        self.assertEqual((10, None), self.mysqldb.query("SELECT * FROM unittest").next())
        # for value in self.mysqldb.query("SELECT * FROM unittest"):
        #    print value
        
        # Insert row with an explicit None:
        colnameValueDict = OrderedDict([('col1', None)])
        self.mysqldb.insert('unittest', colnameValueDict)
        
        cursor = self.mysqldb.connection.cursor()
        cursor.execute('SELECT col1 FROM unittest')
        # Swallow the first row: 10, Null:
        cursor.fetchone()
        # Get col1 of the row we added (the 2nd row):
        val = cursor.fetchone()
        self.assertEqual(val, (None,))
        cursor.close()
 
    #-------------------------
    # Insert One Row With Error 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testInsertWithError(self):
        schema = OrderedDict([('col1', 'INT'), ('col2', 'TEXT')])
        self.mysqldb.createTable('unittest', schema)
        colnameValueDict = OrderedDict([('col1', 10)])
        (errors,warnings) = self.mysqldb.insert('unittest', colnameValueDict)
        self.assertIsNone(errors)
        self.assertIsNone(warnings)
        self.assertEqual((10, None), self.mysqldb.query("SELECT * FROM unittest").next())
        # for value in self.mysqldb.query("SELECT * FROM unittest"):
        #    print value

    
    #-------------------------
    # Insert Several Columns 
    #--------------

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testInsertSeveralColumns(self):
        schema = OrderedDict([('col1', 'INT'), ('col2', 'TEXT')])
        self.mysqldb.createTable('unittest', schema)
        colnameValueDict = OrderedDict([('col1', 10), ('col2', 'My Poem')])
        self.mysqldb.insert('unittest', colnameValueDict)
        res = self.mysqldb.query("SELECT * FROM unittest").next()
        self.assertEqual((10, 'My Poem'), res)
    

    #-------------------------
    # Bulk Insertion 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testBulkInsert(self):
        # Called twice: once by the unittest engine,
        # and again by testWithMySQLPassword() to 
        # exercise the pwd-bound branch in bulkInsert().
      
        # Build test db (this already tests basic bulkinsert):
        #                  col1   col2
        #                   10,  'col1'
        #                   20,  'col2'
        #                   30,  'col3'
        self.buildSmallDb()
        self.mysqldb.execute('ALTER TABLE unittest ADD PRIMARY KEY(col1)')
        
        # Provoke a MySQL error: duplicate primary key (i.e. 10): 
        # Add another row:  10,  'newCol1':
        colNames = ['col1', 'col2']
        colValues = [(10, 'newCol1')]
        
        (errors, warnings) = self.mysqldb.bulkInsert('unittest', colNames, colValues) #@UnusedVariable
        
        # For MySQL 5.7, expect something like:
        #    ((u'Warning', 1062L, u"Duplicate entry '10' for key 'PRIMARY'"),)
        # MySQL 5.6 just skips: 
        
        if self.mysql_ge_5_7:
            self.assertEqual(len(warnings), 1)
        else:
            self.assertIsNone(warnings)
            
        # First tuple should still be (10, 'col1'):
        self.assertEqual('col1', self.mysqldb.query('SELECT col2 FROM unittest WHERE col1 = 10').next())
        
        # Try update again, but with replacement:
        (errors, warnings) = self.mysqldb.bulkInsert('unittest', colNames, colValues, onDupKey=DupKeyAction.REPLACE) #@UnusedVariable
        self.assertIsNone(warnings)
        # Now row should have changed:
        self.assertEqual('newCol1', self.mysqldb.query('SELECT col2 FROM unittest WHERE col1 = 10').next())
        
        # Insert a row with duplicate key, specifying IGNORE:
        colNames = ['col1', 'col2']
        colValues = [(10, 'newCol2')]
        (errors, warnings) = self.mysqldb.bulkInsert('unittest', colNames, colValues, onDupKey=DupKeyAction.IGNORE) #@UnusedVariable
        # Even when ignoring dup keys, MySQL 5.7/8.x issue a warning
        # for each dup key:
        
        if self.mysql_ge_5_7:
            self.assertEqual(len(warnings), 1)
        else:
            self.assertIsNone(warnings)
        
        self.assertEqual('newCol1', self.mysqldb.query('SELECT col2 FROM unittest WHERE col1 = 10').next())
        
        # Insertions that include NULL values:
        colValues = [(40, None), (50, None)]
        (errors, warnings) = self.mysqldb.bulkInsert('unittest', colNames, colValues) #@UnusedVariable
        self.assertEqual(None, self.mysqldb.query('SELECT col2 FROM unittest WHERE col1 = 40').next())
        self.assertEqual(None, self.mysqldb.query('SELECT col2 FROM unittest WHERE col1 = 50').next())
        
        # Provoke an error:
        colNames = ['col1', 'col2', 'col3']
        colValues = [(10, 'newCol2')]
        (errors, warnings) = self.mysqldb.bulkInsert('unittest', colNames, colValues, onDupKey=DupKeyAction.IGNORE) #@UnusedVariable
        self.assertEqual(len(errors), 1)
        
    #-------------------------
    # Updates 
    #--------------

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testUpdate(self):
      
        num_rows = self.buildSmallDb()
        cursor = self.mysqldb.connection.cursor()
        
        # Initially, col2 of row0 must be 'col1':
        cursor.execute('SELECT col2 FROM unittest WHERE col1 = 10')
        col2_row_zero = cursor.fetchone()
        self.assertTupleEqual(col2_row_zero, ('col1',))
        
        self.mysqldb.update('unittest', 'col1', 40, fromCondition='col1 = 10')
        
        # Now no col1 with value 10 should exist:
        cursor.execute('SELECT col2 FROM unittest WHERE col1 = 10')
        self.assertEqual(cursor.rowcount, 0)
        # But a row with col1 == 40 should have col2 == 'col1':
        cursor.execute('SELECT col2 FROM unittest WHERE col1 = 40')
        col2_res = cursor.fetchone()
        self.assertTupleEqual(col2_res, ('col1',))
        
        # Update *all* rows in one column:
        self.mysqldb.update('unittest', 'col1', 0)
        cursor.execute('SELECT count(*) FROM unittest WHERE col1 = 0')
        res_count = cursor.fetchone()
        self.assertTupleEqual(res_count, (num_rows,))
        
        # Update with a MySQL NULL value by using Python None
        # for input and output:
        self.mysqldb.update('unittest', 'col1', None)
        cursor.execute('SELECT count(*) FROM unittest WHERE col1 is %s', (None,))
        res_count = cursor.fetchone()
        self.assertTupleEqual(res_count, (num_rows,))
        
        # Update with a MySQL NULL value by using Python None
        # with WHERE clause: only set col1 to NULL where col2 = 'col2',
        # i.e. in the 2nd row:
        
        num_rows = self.buildSmallDb()

        self.mysqldb.update('unittest', 'col1', None, "col2 = 'col2'")
        cursor.execute('SELECT count(*) FROM unittest WHERE col1 is %s', (None,))
        res_count = cursor.fetchone()
        self.assertTupleEqual(res_count, (1,))
                        
        # Provoke an error:
        (errors,warnings) = self.mysqldb.update('unittest', 'col6', 40, fromCondition='col1 = 10') #@UnusedVariable
        self.assertEqual(len(errors), 1)
        
        cursor.close()
    
    # ----------------------- Queries -------------------------         

    #-------------------------
    # Query With Result Iteration 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testQueryIterator(self):
        self.buildSmallDb()

        for rowNum, result in enumerate(self.mysqldb.query('SELECT col1,col2 FROM unittest')):
            if rowNum == 0:
                self.assertEqual((10, 'col1'), result)
            elif rowNum == 1:
                self.assertEqual((20, 'col2'), result)
            elif rowNum == 2:
                self.assertEqual((30, 'col3'), result)

        # Test the dict cursor
        self.mysqldb.close()
        self.mysqldb = MySQLDB(host='localhost',
                               user='******',
                               db='unittest',
                               cursor_class=Cursors.DICT)
        
        for result in self.mysqldb.query('SELECT col1,col2 FROM unittest'):
          
            self.assertIsInstance(result, dict)
            
            if result['col1'] == 10:
                self.assertEqual(result['col2'], 'col1')
            elif result['col1'] == 20:
                self.assertEqual(result['col2'], 'col2')
            elif result['col1'] == 30:
                self.assertEqual(result['col2'], 'col3')

    #-------------------------
    # Query Unparameterized 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testExecuteArbitraryQuery(self):
        self.buildSmallDb()
        self.mysqldb.execute("UPDATE unittest SET col1=120")
        for result in self.mysqldb.query('SELECT col1 FROM unittest'):
            self.assertEqual(120, result)
        
    #-------------------------
    # Query Parameterized 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testExecuteArbitraryQueryParameterized(self):
        self.buildSmallDb()
        myVal = 130
        self.mysqldb.executeParameterized("UPDATE unittest SET col1=%s", (myVal,))
        for result in self.mysqldb.query('SELECT col1 FROM unittest'):
            self.assertEqual(130, result)
        
        # Provoke an error:
        (errors,warnings) = self.mysqldb.executeParameterized("UPDATE unittest SET col10=%s", (myVal,)) #@UnusedVariable
        self.assertEqual(len(errors), 1)
        
    #-------------------------
    # Reading System Variables 
    #--------------

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testReadSysVariable(self):
        this_host = socket.gethostname()
        mysql_hostname = self.mysqldb.query('SELECT @@hostname').next()
        self.assertIn(mysql_hostname, [this_host, 'localhost'])

    #-------------------------
    # User-Level Variables 
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testUserVariables(self):

        pre_foo = self.mysqldb.query("SELECT @foo").next()
        self.assertEqual(pre_foo, None)
        
        self.mysqldb.execute("SET @foo = 'new value';")
        
        post_foo = self.mysqldb.query("SELECT @foo").next()
        self.assertEqual(post_foo, 'new value')
        
        self.mysqldb.execute("SET @foo = 'NULL';")

    #-------------------------
    # testDbName 
    #--------------

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testDbName(self):
        self.assertEqual(self.mysqldb.dbName(), 'unittest')
    
            
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testWithMySQLPassword(self):
        
        try:
            # Set a password for the unittest user:
            if self.mysql_ge_5_7:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = 'foobar'")
            else:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = PASSWORD('foobar')")

            self.mysqldb.close()
            
            # We should be unable to log in without a pwd:
            with self.assertRaises(ValueError):
                self.mysqldb = MySQLDB(host='localhost', user='******', db='unittest')
                
            # Open new pymysql_db.MySQLDb instance, supplying pwd: 
            self.mysqldb = MySQLDB(host='localhost', user='******', passwd='foobar', db='unittest')
            # Do a test query:
            self.buildSmallDb()
            res = self.mysqldb.query("SELECT col2 FROM unittest WHERE col1 = 10;").next()
            self.assertEqual(res, 'col1')
            
            # Bulk insert is also different for pwd vs. none:
            self.testBulkInsert()
        finally:
            # Make sure the remove the pwd from user unittest,
            # so that other tests will run successfully:
            if self.mysql_ge_5_7:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = ''")
            else:
                self.mysqldb.execute("SET PASSWORD FOR unittest@localhost = PASSWORD('')")
            
    #-------------------------
    # testResultCount 
    #--------------
            
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testResultCount(self):
        self.buildSmallDb()
        query_str = 'SELECT * FROM unittest'
        self.mysqldb.query(query_str)
        self.assertEqual(self.mysqldb.result_count(query_str), 3)
    
    
    #-------------------------
    # testInterleavedQueries
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testInterleavedQueries(self):
        
        self.buildSmallDb()
        query_str1 = 'SELECT col2 FROM unittest ORDER BY col1'
        query_str2 = 'SELECT col2 FROM unittest WHERE col1 = 20 or col1 = 30 ORDER BY col1' 
        res_it1 = self.mysqldb.query(query_str1)
        res_it2 = self.mysqldb.query(query_str2)
        
        self.assertEqual(res_it1.result_count(), 3)
        self.assertEqual(res_it2.result_count(), 2)
        self.assertEqual(self.mysqldb.result_count(query_str1), 3)
        self.assertEqual(self.mysqldb.result_count(query_str2), 2)
        
        self.assertEqual(res_it1.next(), 'col1')
        self.assertEqual(res_it2.next(), 'col2')
        
        self.assertEqual(res_it1.result_count(), 3)
        self.assertEqual(res_it2.result_count(), 2)
        self.assertEqual(self.mysqldb.result_count(query_str1), 3)
        self.assertEqual(self.mysqldb.result_count(query_str2), 2)
        
        self.assertEqual(res_it1.next(), 'col2')
        self.assertEqual(res_it2.next(), 'col3')
        
        self.assertEqual(res_it1.next(), 'col3')
        with self.assertRaises(StopIteration): 
            res_it2.next()
        
        with self.assertRaises(ValueError): 
            res_it2.result_count()
            
        with self.assertRaises(ValueError): 
            self.mysqldb.result_count(query_str2)
            
    #-------------------------
    # testBadParameters
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testBadParameters(self):
        self.mysqldb.close()

        # Test setting parameters illegally to None: 
        try:        
            with self.assertRaises(Exception) as context:
                MySQLDB(host=None, port=3306, user='******', db='unittest')
            self.assertTrue("None value(s) for ['host']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
    
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=None, user='******', db='unittest')
            self.assertTrue("None value(s) for ['port']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
    
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=3306, user=None, db='unittest')
            self.assertTrue("None value(s) for ['user']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
            
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=3306, user='******', db=None)
            self.assertTrue("None value(s) for ['db']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
            
            with self.assertRaises(Exception) as context:
                MySQLDB(host='localhost', port=3306, user='******', passwd=None, db='unittest')
            self.assertTrue("None value(s) for ['passwd']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
            
            with self.assertRaises(Exception) as context:
                MySQLDB(host=None, port=3306, user=None, db=None)
            self.assertTrue("None value(s) for ['host', 'db', 'user']; none of host,port,user,passwd or db must be None" 
                            in str(context.exception))
        except AssertionError:
            # Create a better message than 'False is not True'.
            # That useless msg is generated if an expected exception
            # above is NOT raised:
            raise AssertionError('Expected ValueError exception "%s" was not raised.' % context.exception.message)
            
        # Check data types of parameters:
        try:
            # One illegal type: host==10:
            with self.assertRaises(Exception) as context:
                # Integer instead of string for host:
                MySQLDB(host=10, port=3306, user='******', db='myDb')
            self.assertTrue("Value(s) ['host'] have bad type;host,user,passwd, and db must be strings; port must be int."
                            in str(context.exception))
            # Two illegal types: host and user:
            with self.assertRaises(Exception) as context:
                # Integer instead of string for host:
                MySQLDB(host=10, port=3306, user=30, db='myDb')
            self.assertTrue("Value(s) ['host', 'user'] have bad type;host,user,passwd, and db must be strings; port must be int."
                            in str(context.exception))
            
            # Port being string instead of required int:
            with self.assertRaises(Exception) as context:
                # Integer instead of string for host:
                MySQLDB(host='myHost', port='3306', user='******', db='myDb')
            self.assertTrue("Port must be an integer; was" in str(context.exception))
            
        except AssertionError:
            # Create a better message than 'False is not True'.
            # That useless msg is generated if an expected exception
            # above is NOT raised:
            raise AssertionError('Expected ValueError exception "%s" was not raised.' % context.exception.message)

    #-------------------------
    # testIsOpen
    #--------------
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")    
    def testIsOpen(self):
        
        self.assertTrue(self.mysqldb.isOpen())
        self.mysqldb.close()
        self.assertFalse(self.mysqldb.isOpen())

    # ----------------------- UTILITIES -------------------------
    
    #-------------------------
    # buildSmallDb 
    #--------------
    
    def buildSmallDb(self):
        '''
        Creates a two-col, three-row table in database
        unittest. The table is called 'unittest'.
        Returns number of rows created.
        
        ====      ======
        col1       col2
        ====      ======
         10       'col1'
         20       'col2'
         30       'col3'
        ====      ======
        
        '''
        cur = self.mysqldb.connection.cursor()
        with no_warn_no_table():
            cur.execute('DROP TABLE IF EXISTS unittest')
        cur.execute('CREATE TABLE unittest (col1 INT, col2 TEXT)')
        cur.execute("INSERT INTO unittest VALUES (10, 'col1')")
        cur.execute("INSERT INTO unittest VALUES (20, 'col2')")
        cur.execute("INSERT INTO unittest VALUES (30, 'col3')")
        self.mysqldb.connection.commit()
        cur.close()
        return 3
    
    #-------------------------
    # get_mysql_version 
    #--------------
    
    @classmethod  
    def get_mysql_version(cls):
        '''
        Return a tuple: (major, minor). 
        Example, for MySQL 5.7.15, return (5,7).
        Return (None,None) if version number not found.

        '''
        
        # Where is mysql client program?
        mysql_path = MySQLDB.find_mysql_path()
      
        # Get version string, which looks like this:
        #   'Distrib 5.7.15, for osx10.11 (x86_64) using  EditLine wrapper\n'
        version_str = subprocess.check_output([mysql_path, '--version']).decode('utf-8')
        
        # Isolate the major and minor version numbers (e.g. '5', and '7')
        pat = re.compile(r'([0-9]*)[.]([0-9]*)[.]')
        match_obj = pat.search(version_str)
        if match_obj is None:
            return (None,None)
        (major, minor) = match_obj.groups()
        return (int(major), int(minor))
      
        
#         self.mysqldb.dropTable('unittest')
#         self.mysqldb.createTable('unittest', schema)
#         colNames = ['col1', 'col2']
#         colValues = [(10, 'col1'), (20, 'col2'), (30, 'col3')]
#         warnings = self.mysqldb.bulkInsert('unittest', colNames, colValues)
#         self.assertIsNone(warnings)
#         return 3

    #-------------------------
    # convert_to_string
    #--------------
    
    def convert_to_string(self, strLike):
        '''
        The str/byte/unicode type mess between
        Python 2.7 and 3.x. We want as 'normal'
        a string as possible. Surely there is a
        more elegant way.
        
        @param strLike: a Python 3 str (i.e. unicode string), a Python 3 binary str.
            a Python 2.7 unicode string, or a Python 2.7 str.
        @type strLike: {str|unicode|byte}
        '''
        
        try:
            if type(strLike) == eval('unicode'):
                # Python 2.7 unicode --> str:
                strLike = strLike.encode('UTF-8')
        except NameError:
            pass
        
        try:
            if type(strLike) == eval('bytes'):
                # Python 3 byte string:
                strLike = strLike.decode('UTF-8')
        except NameError:
            pass
        
        return strLike

    #-------------------------
    # read_config_file_content
    #--------------

    @classmethod
    def read_config_file_content(cls):
        '''
        Read and return content of pymysql_utils.cnf.py
        '''
        curr_dir = os.path.dirname(__file__)
        config_file_name = os.path.join(curr_dir, 'pymysql_utils.cnf.py')
        with open(config_file_name, 'r') as fd:
            return fd.read() 
    
    #-------------------------
    # write_config_file_content 
    #--------------
    
    @classmethod
    def write_config_file_content(cls, content):
        curr_dir = os.path.dirname(__file__)
        config_file_name = os.path.join(curr_dir, 'pymysql_utils.cnf.py')
        with open(config_file_name, 'w') as fd:
            return fd.write(content) 
class TestAddAnonToActivityGrade(unittest.TestCase):

    studentmoduleExcerptSchema = OrderedDict({
        'activity_grade_id': 'INT',
        'student_id': 'INT',
        'course_display_name': 'VARCHAR(255)',
        'grade': 'VARCHAR(5)',
        'max_grade': 'DOUBLE',
        'percent_grade': 'DOUBLE',
        'parts_correctness': 'VARCHAR(255)',
        'answers': 'VARCHAR(255)',
        'num_attempts': 'INT',
        'first_submit': 'DATETIME',
        'last_submit': 'DATETIME',
        'module_type': 'VARCHAR(255)',
        'anon_screen_name': 'VARCHAR(40)',
        'resource_display_name': 'VARCHAR(255)',
        'module_id': 'VARCHAR(255)'
    })

    studentmoduleExcerptColNames = [
        'activity_grade_id', 'student_id', 'course_display_name', 'grade',
        'max_grade', 'percent_grade', 'parts_correctness', 'answers',
        'num_attempts', 'first_submit', 'last_submit', 'module_type',
        'anon_screen_name', 'resource_display_name', 'module_id'
    ]
    userGradeExcerptSchema = OrderedDict({
        'name': 'varchar(255)',
        'screen_name': 'varchar(255)',
        'grade': 'int',
        'course_id': 'varchar(255)',
        'distinction': 'tinyint',
        'status': 'varchar(50)',
        'user_int_id': 'int',
        'anon_screen_name': 'varchar(40)'
    })

    userGradeExcerptColNames = [
        'name', 'screen_name', 'grade', 'course_id', 'distinction', 'status',
        'user_int_id', 'anon_screen_name'
    ]

    state1 = ' {"correct_map": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {"hint": "", "hintmode": null, "correctness": "correct", "npoints": null, "msg": "", "queuestate": null}}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}, "attempts": 1, "seed": 1, "done": true, "student_answers": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": "choice_1"}} '
    state2 = '{"correct_map": {}, "seed": 1, "student_answers": {}, "input_state": {"i4x-Medicine-HRP258-problem-0c6cf38317be42e0829d10cc68e7451b_2_1": {}}}'
    state3 = '{"position": 1}'

    modid1 = 'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'
    modid2 = 'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'
    modid3 = 'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'
    studentmoduleExcerptValues = \
                [
                [0,1,'myCourse',3,10,-1.0,state1,'',-1,'2014-01-10 04:10:45','2014-02-10 10:14:40','modtype1','abc','Guided Walkthrough',modid1],
                [1,2,'myCourse',5,10,-1.0,state2,'',-1,'2014-01-10 11:30:23','2014-02-10 14:30:12','modtype2','def','Evaluation',modid2],
                [2,3,'yourCourse',8,10,-1.0,state3,'',-1,'2014-01-10 18:34:12','2014-02-10 19:10:33','modtype2','ghi','Introduction',modid3]
               ]

    userGradeExcerptValues = \
                [
                ['John Doe','myScreenName',0,'engineering/myCourse/summer2014',0,'notpassing',1,'abc'],
                ['Jane Silver','herScreenName',100,'engineering/myCourse/summer2014',1,'passing',2,'def']
               ]

    def setUp(self):
        self.allColNames = TestAddAnonToActivityGrade.studentmoduleExcerptColNames[
            0]
        for colName in TestAddAnonToActivityGrade.studentmoduleExcerptColNames[
                1:]:
            self.allColNames += ',' + colName

        self.db = MySQLDB(user='******', passwd='', db='unittest')
        self.db.dropTable('StudentmoduleExcerpt')
        self.db.createTable(
            'StudentmoduleExcerpt',
            TestAddAnonToActivityGrade.studentmoduleExcerptSchema,
            temporary=False)
        #***temporary=True)
        self.db.bulkInsert(
            'StudentmoduleExcerpt',
            TestAddAnonToActivityGrade.studentmoduleExcerptColNames,
            TestAddAnonToActivityGrade.studentmoduleExcerptValues)

        self.db.createTable(
            'ActivityGrade',
            TestAddAnonToActivityGrade.studentmoduleExcerptSchema)
        # Make sure there isn't left over content (if the table existed):
        self.db.truncateTable('ActivityGrade')

        # Rudimentary UserGrade table:
        self.db.dropTable('UserGrade')
        self.db.createTable('UserGrade',
                            TestAddAnonToActivityGrade.userGradeExcerptSchema,
                            temporary=False)
        self.db.bulkInsert('UserGrade',
                           TestAddAnonToActivityGrade.userGradeExcerptColNames,
                           TestAddAnonToActivityGrade.userGradeExcerptValues)

        self.db.close()

    def tearDown(self):
        self.db = MySQLDB(user='******', passwd='', db='unittest')
        # Can't drop tables: hangs
        #self.db.dropTable('StudentmoduleExcerpt')
        #self.db.dropTable('ActivityGrade')
        self.db.close()
        pass

    def testAddAnonToActivityTable(self):
        try:
            # Modify the fake courseware_studentmodule excerpt
            # to add anon_screen_name, computer plusses/minusses,
            # compute grade percentage, etc:
            AnonAndModIDAdder('unittest', '', db='unittest', testing=True)
            self.db = MySQLDB(user='******', passwd='', db='unittest')
            for rowNum, row in enumerate(
                    self.db.query('SELECT %s FROM ActivityGrade;' %
                                  self.allColNames)):
                #print(row)
                if rowNum == 0:
                    self.assertEqual((
                        0, 1, 'myCourse', '3', 10.0, 30.0, '', '', -1,
                        datetime.datetime(2014, 1, 10, 4, 10, 45),
                        datetime.datetime(2014, 2, 10, 10, 14, 40), 'modtype1',
                        'abc', 'Guided Walkthrough',
                        'i4x://Carnegie/2013/chapter/1fee4bc0d5384cb4aa7a0d65f3ac5d9b'
                    ), row)
                elif rowNum == 1:
                    self.assertEqual((
                        1, 2, 'myCourse', '5', 10.0, 50.0, '', '', -1,
                        datetime.datetime(2014, 1, 10, 11, 30, 23),
                        datetime.datetime(2014, 2, 10, 14, 30,
                                          12), 'modtype2', 'def', 'Evaluation',
                        'i4x://Carnegie/2013/chapter/5d08d2bae3ac4047bf5abe1d8dd16ac3'
                    ), row)
                elif rowNum == 2:
                    self.assertEqual((
                        2, 3, 'yourCourse', '8', 10.0, 80.0, '', '', -1,
                        datetime.datetime(2014, 1, 10, 18, 34, 12),
                        datetime.datetime(2014, 2, 10, 19, 10, 33), 'modtype2',
                        'None', 'Introduction',
                        'i4x://Carnegie/2013/chapter/9a9455cd30bd4c14819542bcd11bfcf8'
                    ), row)
        finally:
            self.db.close()

    def testCacheIdInt2Anon(self):
        try:
            infoAdder = AnonAndModIDAdder('unittest',
                                          '',
                                          db='unittest',
                                          testing=True)
            self.assertEqual({
                1: 'abc',
                2: 'def',
                3: None
            }, infoAdder.int2AnonCache)
        finally:
            self.db.close()
Example #10
0
class SankeyMajors(object):

    # Minimum number of students to have made the transitions
    # between two particular majors to be included in the
    # diagram. For undergrad majors only:
    #
    #    > 5: 505 pairs
    #    >10: 249 pairs
    #    >20: 101 pairs
    #    >50:  41 pairs
    #    >99:   8 pairs

    MIN_MAJOR_TRANSITIONS = 10

    #--------------------------
    # __init__
    #-------------------

    def __init__(self, majors_table='sankey'):
        '''
        '''

        self.majors_table = majors_table
        self.mysql_passwd = self.getMySQLPasswd()
        self.mysql_dbhost = 'localhost'
        self.mysql_user = getpass.getuser(
        )  # mySQLUser that started this process
        self.mydb = MySQLDB(user=self.mysql_user,
                            passwd=self.mysql_passwd,
                            db=self.majors_table)

        (nodes, links) = self.get_nodes_and_links()
        SankeyDiagram.plot_sankey(nodes,
                                  links,
                                  plot_title="Majors Transitions")

    #--------------------------
    # get_nodes_and_links
    #-------------------

    def get_nodes_and_links(self):

        query = '''SELECT major_left_num, major_right_num, count(*) AS num_transitions
                     FROM majors_transitions
                     GROUP BY major_left_num, major_right_num
                     HAVING num_transitions > %s
                ''' % SankeyMajors.MIN_MAJOR_TRANSITIONS

        MAJOR_LEFT_NUM = 0
        MAJOR_RIGHT_NUM = 1
        NUM_TRANSITIONS = 2

        link_info = self.mydb.query(query).nextall()

        # The int coercions below prevent nums to be treated
        # like LONG, and have subsequent displays of the numbers
        # have an 'L' suffix:

        links = [
            SankeyLink(int(one_link_info[MAJOR_LEFT_NUM]),
                       int(one_link_info[MAJOR_RIGHT_NUM]),
                       int(one_link_info[NUM_TRANSITIONS]))
            for one_link_info in link_info
        ]

        nodes = self.get_nodes(links)

        # Now we need to change the source and target node numbers
        # in the SankeyLink objects to be indexes into the just
        # obtained list of SankeyNodes instances. Right now those
        # source/target numbers are the absolute majors numbers
        # in the db:

        for sankey_link_obj in links:
            src_node_abs_num = sankey_link_obj.src_node_num
            # Find node obj with the link object's left-major absolute
            # number. The filter() method returns an array, therefore
            # the [0]
            src_node_obj = filter(
                lambda sankey_node_obj: sankey_node_obj.num ==
                src_node_abs_num, nodes)[0]
            node_index = nodes.index(src_node_obj)
            sankey_link_obj.src_node_num = node_index

            # Same for target:
            target_node_abs_num = sankey_link_obj.target_node_num
            target_node_obj = filter(
                lambda sankey_node_obj: sankey_node_obj.num ==
                target_node_abs_num, nodes)[0]
            node_index = nodes.index(target_node_obj)
            sankey_link_obj.target_node_num = node_index

        return (nodes, links)

    #--------------------------
    # get_nodes
    #-------------------

    def get_nodes(self, sankey_link_obj_list):

        # The 'int' coercion is to avoid the suffix
        # 'L'
        nodes_to_get = [
            link_obj.src_node_num for link_obj in sankey_link_obj_list
        ]
        nodes_to_get.extend(
            [link_obj.target_node_num for link_obj in sankey_link_obj_list])

        # Turn into a tuple so that conversion to string
        # in query below will yield a nice list of node numbers
        # in parens:

        node_num_tuple = tuple(nodes_to_get)

        query = '''SELECT major_num, major
                     FROM major_nums
                    WHERE major_num IN %s;
                ''' % str(node_num_tuple)
        # Get:
        #     [
        #       (1,AA-BS)
        #       (2,AES-BAS)
        #       (3,AES-BS)
        #     ]
        MAJOR_NUM = 0
        MAJOR_NAME = 1

        node_info = self.mydb.query(query).nextall()

        color_source = ColorSource(len(node_info))

        # The 'str()' is to get rid of the unicode 'u' prefix:
        nodes = [
            SankeyNode(int(one_node_info[MAJOR_NUM]),
                       str(one_node_info[MAJOR_NAME]), color_source.next())
            for one_node_info in node_info
        ]

        return (nodes)

    #--------------------------
    # get MySQLPasswd
    #-------------------

    def getMySQLPasswd(self):
        homeDir = os.path.expanduser('~' + getpass.getuser())
        f_name = homeDir + '/.ssh/mysql'
        try:
            with open(f_name, 'r') as f:
                password = f.readline().strip()
        except IOError:
            return ''
        return password
Example #11
0
class TestForumEtl(unittest.TestCase):

    # Forum rows have the following columns:
    #  type, anonymous, anonymous_to_peers, at_position_list, user_int_id, body, course_display_name, created_at, votes, count, down_count, up_count, up, down, comment_thread_id, parent_id, parent_ids, sk   

    # Correct result for relationization of tinyForum.json
    # (in <projDir>/src/forum_etl/data). This result is anonymized and not relatable,
    # i.e. poster name UIDs use integers, while other tables use hashes:
    tinyForumGoldAnonymized = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('anon_screen_name_redacted','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]
    
    # Gold result for anonymization that allows relating to other tables (i.e. hashes are constant)
    tinyForumGoldRelatable = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('abc','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('def','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('ghi','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> <nameRedac_abc> <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]
    
    # Gold result for non-anonymized forum:
    tinyForumGoldClear = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('otto_king','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('fritzL','Comment', 'False', 'False', '[]', 7L, ' Body with [email protected] email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster name Otto embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('bebeW','Comment', 'False', 'False', '[]', 10L, 'Body with 650-333-4567 a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name otto_king embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name Otto van Homberg embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]    

    def setUp(self):
        
        self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum")
        # Fill the little MongoDB with test JSON lines
        self.resetMongoTestDb()
        
        self.mysqldb = MySQLDB(user='******', db='unittest')
        # Start with an empty result MySQL table for each test:
        self.mysqldb.dropTable('contents')
        # Fill the fake UserGrade table with records of course participants:
        self.resetMySQLUserListDb()
        
        # Instantiate a Forum scrubber without the 
        # name of a bson file that contains forum
        # records. That 'None' for the bson file will
        # make the class understand that it's being
        # instantiated for a unit test. 
        self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade')
        self.forumScrubberRelatable  = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True)
        self.forumScrubberClear      = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)

    def tearDown(self):
        self.mysqldb.close()

    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testAnonymized(self):
        self.forumScrubberAnonymized.populateUserCache()
        self.forumScrubberAnonymized.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldAnonymized[rowNum], forumPost)
            
    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testNonAnonymizedRelatable(self):
        self.forumScrubberRelatable.populateUserCache()
        self.forumScrubberRelatable.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldRelatable[rowNum], forumPost)

    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testNonAnonymized(self):
        self.forumScrubberClear.populateUserCache()
        self.forumScrubberClear.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldClear[rowNum], forumPost)


    
    def resetMongoTestDb(self):
        self.mongoDb.clearCollection()
        # Use small, known forum collection:
        currDir = os.path.dirname(__file__)     
        with open(os.path.join(currDir, 'data/tinyForum.json'), 'r') as jsonFd:
            for line in jsonFd:
                forumPost = json.loads(line)
                self.mongoDb.insert(forumPost)

    def resetMySQLUserListDb(self):
        '''
        Prepare a MySQL table that mimicks EdxPrivate.UserGrade.
        '''
        
        userGradeColSpecs = OrderedDict(
                                        {
                                         'name' : 'varchar(255)',
                                         'screen_name' : 'varchar(255)',
                                         'grade' : 'int',
                                         'course_id' : 'varchar(255)',
                                         'distinction' : 'tinyint',
                                         'status' : 'varchar(50)',
                                         'user_int_id' : 'int(11)',
                                         'anon_screen_name' : 'varchar(40)'
                                         })
        self.mysqldb.dropTable('UserGrade')
        self.mysqldb.createTable('UserGrade', userGradeColSpecs)
        self.mysqldb.bulkInsert('UserGrade', 
                                ('name','screen_name','grade','course_id','distinction','status','user_int_id','anon_screen_name'),
                                [
                                 ('Otto van Homberg','otto_king',5,'oldCourse',0,'notpassing',5,'abc'),
                                 ('Andreas Fritz','fritzL',2,'newCourse',0,'notpassing',7,'def'),
                                 ('Bebe Winter', 'bebeW',10,'History of Baking',1,'passing',10,'ghi')
                                 ])
Example #12
0
class TestForumEtl(unittest.TestCase):

    # Forum rows have the following columns:
    #  type, anonymous, anonymous_to_peers, at_position_list, user_int_id, body, course_display_name, created_at, votes, count, down_count, up_count, up, down, comment_thread_id, parent_id, parent_ids, sk   

    # Correct result for relationization of tinyForum.json
    # (in <projDir>/src/forum_etl/data). This result is anonymized and not relatable,
    # i.e. poster name UIDs use integers, while other tables use hashes:
    tinyForumGoldAnonymized = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('anon_screen_name_redacted','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('anon_screen_name_redacted','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> <nameRedac_anon_screen_name_redacted> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]
    
    # Gold result for anonymization that allows relating to other tables (i.e. hashes are constant)
    tinyForumGoldRelatable = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('abc','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('def','Comment', 'False', 'False', '[]', 7L, ' Body with <emailRedac> email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('ghi','Comment', 'False', 'False', '[]', 10L, 'Body with <phoneRedac> a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('abc','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name <nameRedac_abc> <nameRedac_abc> <nameRedac_abc> embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]
    
    # Gold result for non-anonymized forum:
    tinyForumGoldClear = \
    [
    # poster Otto van Homberg: body is clean to start with:
    ('otto_king','CommentThread', 'False', 'False', '[]', 5L, 'Harmless body', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 20), "{u'count': 10, u'point': -6, u'down_count': 8, u'up': [u'2', u'10'], u'down': [u'1', u'3', u'4', u'5', u'6', u'7', u'8', u'9'], u'up_count': 2}", 10L, 8L, 2L, "['2', '10']", "['1', '3', '4', '5', '6', '7', '8', '9']", None, None, None, None),
    # poster Andreas Fritz: body has someone's email:
    ('fritzL','Comment', 'False', 'False', '[]', 7L, ' Body with [email protected] email.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 10, u'point': -4, u'down_count': 7, u'up': [u'6', u'8', u'10'], u'down': [u'1', u'2', u'3', u'4', u'5', u'7', u'9'], u'up_count': 3}", 10L, 7L, 3L, "['6', '8', '10']", "['1', '2', '3', '4', '5', '7', '9']", '519461545924670200000001', None, '[]', '519461555924670200000006'),
    # poster Otto van Homberg: body has 'Otto':
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster name Otto embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),
    # poster Andreas Fritz: body has a phone number:
    ('bebeW','Comment', 'False', 'False', '[]', 10L, 'Body with 650-333-4567 a phone number.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461545924670200000005', "[u'519461545924670200000005']", '519461545924670200000005-519461555924670200000008'),
    # poster Otto van Homberg: body has his screen name (otto_king):
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name otto_king embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007'),    
    # poster Otto van Homberg: body has his full name (Otto van Homberg):
    ('otto_king','Comment', 'False', 'False', '[]', 5L, 'Body with poster screen name Otto van Homberg embedded.', 'MITx/6.002x/2012_Fall', datetime.datetime(2013, 5, 16, 4, 32, 21), "{u'count': 0, u'point': 0, u'down_count': 0, u'up': [], u'down': [], u'up_count': 0}", 0L, 0L, 0L, '[]', '[]', '519461545924670200000001', '519461555924670200000006', "[u'519461555924670200000006']", '519461555924670200000006-519461555924670200000007')    
    ]    

    def setUp(self):
        
        self.mongoDb = MongoDB(dbName="unittest", collection="tinyForum")
        # Fill the little MongoDB with test JSON lines
        self.resetMongoTestDb()
        
        self.mysqldb = MySQLDB(mySQLUser='******', db='unittest')
        # Start with an empty result MySQL table for each test:
        self.mysqldb.dropTable('contents')
        # Fill the fake UserGrade table with records of course participants:
        self.resetMySQLUserListDb()
        
        # Instantiate a Forum scrubber without the 
        # name of a bson file that contains forum
        # records. That 'None' for the bson file will
        # make the class understand that it's being
        # instantiated for a unit test. 
        self.forumScrubberAnonymized = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade')
        self.forumScrubberRelatable  = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', allowAnonScreenName=True)
        self.forumScrubberClear      = EdxForumScrubber(None, mysqlDbObj=self.mysqldb, forumTableName='contents', allUsersTableName='unittest.UserGrade', anonymize=False)

    def tearDown(self):
        self.mysqldb.close()

    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testAnonymized(self):
        self.forumScrubberAnonymized.populateUserCache()
        self.forumScrubberAnonymized.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldAnonymized[rowNum], forumPost)
            
    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testNonAnonymizedRelatable(self):
        self.forumScrubberRelatable.populateUserCache()
        self.forumScrubberRelatable.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldRelatable[rowNum], forumPost)

    @unittest.skipIf(not RUN_ALL_TESTS, 
                     'Uncomment this decoration if RUN_ALL_TESTS is False, and you want to run just this test.')    
    def testNonAnonymized(self):
        self.forumScrubberClear.populateUserCache()
        self.forumScrubberClear.forumMongoToRelational(self.mongoDb, self.mysqldb, 'contents')  
        for rowNum, forumPost in enumerate(self.mysqldb.query('SELECT * FROM unittest.contents')):
            # print(str(rowNum) + ':' + str(forumPost))
            self.assertEqual(TestForumEtl.tinyForumGoldClear[rowNum], forumPost)


    
    def resetMongoTestDb(self):
        self.mongoDb.clearCollection()
        # Use small, known forum collection:
        currDir = os.path.dirname(__file__)     
        with open(os.path.join(currDir, 'data/tinyForum.json'), 'r') as jsonFd:
            for line in jsonFd:
                forumPost = json.loads(line)
                self.mongoDb.insert(forumPost)

    def resetMySQLUserListDb(self):
        '''
        Prepare a MySQL table that mimicks EdxPrivate.UserGrade.
        '''
        
        userGradeColSpecs = OrderedDict(
                                        {
                                         'name' : 'varchar(255)',
                                         'screen_name' : 'varchar(255)',
                                         'grade' : 'int',
                                         'course_id' : 'varchar(255)',
                                         'distinction' : 'tinyint',
                                         'status' : 'varchar(50)',
                                         'user_int_id' : 'int(11)',
                                         'anon_screen_name' : 'varchar(40)'
                                         })
        self.mysqldb.dropTable('UserGrade')
        self.mysqldb.createTable('UserGrade', userGradeColSpecs)
        self.mysqldb.bulkInsert('UserGrade', 
                                ('name','screen_name','grade','course_id','distinction','status','user_int_id','anon_screen_name'),
                                [
                                 ('Otto van Homberg','otto_king',5,'oldCourse',0,'notpassing',5,'abc'),
                                 ('Andreas Fritz','fritzL',2,'newCourse',0,'notpassing',7,'def'),
                                 ('Bebe Winter', 'bebeW',10,'History of Baking',1,'passing',10,'ghi')
                                 ])
class CoursesGivenQuarter(object):
    '''
    Bus module that queries datastage for course information,
    given academic year, and quarter.
    '''
    
    MYSQL_PORT_LOCAL = 5555
    
    module_topic   = 'course_listing'

    def __init__(self, topic=None, user='******', passwd=''):
        '''
        Instantiated for each incoming bus message
        '''
        if topic is None:
            topic = CoursesGivenQuarter.module_topic
            
        self.mysqldb = MySQLDB(host='127.0.0.1', 
                               port=CoursesGivenQuarter.MYSQL_PORT_LOCAL, 
                               user=user, 
                               passwd=passwd, 
                               db='Edx')
        
        # The following statement is needed only 
        # if your callback is a method (rather than a top 
        # level function). That's because Python methods
        # take 'self' as a first argument, while the Bus 
        # expects a function that just takes topicName, msgText, and msgOffset.
        # The following statement creates a function wrapper around 
        # our callback method that has the leading 'self' parameter built 
        # in. The process is called function currying:
        
        self.requestDeliveryMethod = functools.partial(self.requestCoursesForQuarter)        
        
        # Create a BusAdapter instance:
        
        self.bus = BusAdapter()

        # Tell the bus that you are interested in the topic 'example_use',
        # and want callbacks to self.exampleDeliveryMethod whenever
        # a message arrives:
        
        self.bus.subscribeToTopic(topic, self.requestDeliveryMethod)
        
        # Now we do nothing. In a production system you 
        # would do something useful here:
        
        while True:
            # do anything you like
            self.bus.waitForMessage(CoursesGivenQuarter.module_topic)

    def requestCoursesForQuarter(self, topicName, msgText, msgOffset):
        '''
        This method is called whenever a message in topic
        'course_listing' is published by anyone on the bus.
        The msgText should have the JSON format:
        
            {'id'      : 'abcd'
             'content' : {'academic_year' : '2014',
                          'quarter'       : 'spring'},
             'time'    : '2015-05-27T18:12:22.706204',
                          }           
        
        Response will be of the form:
            {'id'          : 'abcd',
             'status'      : 'OK'
             'content'     : *****
            }
            
        Or, in case of error:
            {'id'          : 'abcd',
             'status'      : 'ERROR'
             'content'     : '<error msg'>
            }
        
        :param topicName: name of topic to which the arriving msg belongs: always learner_homework_history
        :type topicName: string
        :param msgText: text part of the message. JSON as specified above.
        :type msgText: string
        :param msgOffset: position of message in the topic's message history
        :type msgOffset: int
        '''
        try:
            # Import the message into a dict:
            msgDict = json.loads(msgText)
        except ValueError:
            self.bus.logError('Received msg with invalid wrapping JSON: %s (%s)' % str(msgText))
            return

        # Must have a learner message id:
        try:
            reqId = msgDict['id']
        except KeyError:
            self.returnError('NULL', "Error: message type not provided in an incoming request.")
            self.bus.logError("Message type not provided in %s" % str(msgDict))
            return

        # Must have a learner type == 'req'
        try:
            reqKey = msgDict['type']
            if reqKey != 'req':
                return
        except KeyError:
            self.returnError(reqId, "Error: message type not provided in %s" % str(msgDict))
            self.bus.logError('Received msg without a type field: %s' % str(msgText))
            return
        
        # The content field should be legal JSON; make a
        # dict from it:
        try:
            contentDict = msgDict['content']
        except KeyError:
            self.returnError(reqKey, "Error: no content field provided in %s" % str(msgDict))
            self.bus.logError('Received msg without a content field: %s' % str(msgText))
            return
        
        # Must have an academic year:
        try:
            academicYear = contentDict['academic_year']
        except KeyError:
            self.returnError(reqKey, "Error: academic year not provided in %s" % str(msgDict))
            self.bus.logError('Received msg without academic year in content field: %s' % str(msgText))            
            return
            
        # Must have a quarter:
        try:
            quarter = contentDict['quarter']
        except KeyError:
            self.returnError(reqKey, "Error: quarter not provided in %s" % str(msgDict))
            self.bus.logError('Received msg without quarter in content field: %s' % str(msgText))            
            return
        
        # Get an array of dicts, each dict being one MySQL record:
        #    course_display_name,
        #    course_catalog_name,
        #    is_internal
        
        resultArr = self.executeCourseInfoQuery(academicYear, quarter)
        
        # Turn result into an HTML table:
        htmlRes = self.buildHtmlTableFromQueryResult(resultArr)

        # Note that we pass the message type 'resp' 
        # to publish(), and that we specify that the
        # msg ID is to be the same as the incoming request.

        self.bus.publish(htmlRes, 
                         CoursesGivenQuarter.module_topic,
                         msgType='resp',
                         msgId=reqId)
        
    def executeCourseInfoQuery(self, academicYear, quarter):
        
        homeworkQuery = "SELECT course_display_name," +\
    			        "course_catalog_name," +\
    			        "is_internal " +\
    			   "FROM CourseInfo " +\
    			  "WHERE academic_year = '%s' " % academicYear +\
                    " AND quarter = '%s' " % quarter +\
                    ";"

        try:
            resIt = self.mysqldb.query(homeworkQuery)
        except Exception as e:
            self.returnError("Error: Call to database returned an error: '%s'" % `e`)
            self.bus.logError("Call to MySQL returned an error: '%s'" % `e`)
            return
            
        resultArr = []
        for res in resIt:
            resultArr.append(res)
            
        return resultArr
    
    def returnError(self, req_id, errMsg):
        self.bus.publish(errMsg, 
                         CoursesGivenQuarter.module_topic,
                         msgId=req_id, 
                         msgType='resp')

    def buildHtmlTableFromQueryResult(self, resTupleArr):
        htmlStr   = '<table border=1><tr><td><b>Course</b></td><td><b>Description</b></td><td><b>Internal-Only</b></td></tr>'
        strResArr = []
        for (courseDisplayName, courseCatalogName, isInternal) in resTupleArr:
            strResArr.append("<tr><td>%s</td><td>%s</td><td>%s</td></tr>" %
                             (courseDisplayName, courseCatalogName, isInternal))
        htmlStr = htmlStr + ' '.join(strResArr) + '</table>'
        return htmlStr
            
    def close(self):
        try:
            self.mysqldb.close()
        except:
            pass