class UserCountryTableCreator(object): DEST_TABLE = 'UserCountry' def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') self.db.dropTable(UserCountryTableCreator.DEST_TABLE) self.db.createTable(UserCountryTableCreator.DEST_TABLE, OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""', 'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""', 'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""', 'country' : 'varchar(255) NOT NULL DEFAULT ""'})) def fillTable(self): values = [] for (user, ip3LetterCountry) in self.db.query("SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"): try: (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry) except (ValueError,TypeError,KeyError) as e: sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`)) continue values.append(tuple(['%s'%user,'%s'%twoLetterCode,'%s'%threeLetterCode,'%s'%country])) colNameTuple = ('anon_screen_name','two_letter_country','three_letter_country','country') self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values) def makeIndex(self): self.db.execute("CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);") self.db.execute("CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);") def close(self): self.db.close()
class UserCountryTableCreator(object): DEST_TABLE = 'UserCountry' def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') self.db.dropTable(UserCountryTableCreator.DEST_TABLE) self.db.createTable( UserCountryTableCreator.DEST_TABLE, OrderedDict({ 'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""', 'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""', 'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""', 'country': 'varchar(255) NOT NULL DEFAULT ""' })) def fillTable(self): values = [] for (user, ip3LetterCountry) in self.db.query( "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract" ): try: (twoLetterCode, threeLetterCode, country ) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry) except (ValueError, TypeError, KeyError) as e: sys.stderr.write( "Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry, ` e `)) continue values.append( tuple([ '%s' % user, '%s' % twoLetterCode, '%s' % threeLetterCode, '%s' % country ])) colNameTuple = ('anon_screen_name', 'two_letter_country', 'three_letter_country', 'country') self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values) def makeIndex(self): self.db.execute( "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);" ) self.db.execute( "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);" ) def close(self): self.db.close()
class UserCountryTableCreator(object): DEST_TABLE = 'UserCountry' # Number of anon ids-country-2-letter-3-letter # tuples to accumulate before inserting into # UserCountry: INSERT_BULK_SIZE = 15000 def __init__(self, user, pwd): self.ipCountryXlater = IpCountryDict() self.user = user self.pwd = pwd self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx') # Make sure table exists. It should, and it should be filled # with all anon_screen_name and countries up the previous # load: createCmd = '''CREATE TABLE UserCountry ( anon_screen_name varchar(40) NOT NULL DEFAULT "", two_letter_country varchar(2) NOT NULL DEFAULT "", three_letter_country varchar(3) NOT NULL DEFAULT "", country varchar(255) NOT NULL DEFAULT "" ) ENGINE=MyISAM; ''' self.db.dropTable('UserCountry') print("Creating table UserCountry...") self.db.execute(createCmd) print("Done creating table UserCountry.") def fillTable(self): query = "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract" query_res_it = self.db.query(query) done = False # Order of columns for insert: colNameTuple = ('anon_screen_name', 'two_letter_country', 'three_letter_country', 'country') while not done: values = [] print("%s: Starting one set of %s lookups..." %\ (str(datetime.datetime.today()), UserCountryTableCreator.INSERT_BULK_SIZE)) for _ in range(UserCountryTableCreator.INSERT_BULK_SIZE): try: (anon_screen_name, ip3LetterCountry) = query_res_it.next() except StopIteration: done = True break # Try translating: try: (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode( ip3LetterCountry) except (ValueError, TypeError, KeyError): twoLetterCode = 'XX' threeLetterCode = 'XXX' country = 'Not in lookup tbl' #sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`)) values.append( tuple([ '%s' % anon_screen_name, '%s' % twoLetterCode, '%s' % threeLetterCode, '%s' % country ])) # Insert this chunk into the UserCountry table print("%s: Inserting %s rows into UserCountry table..." % (str(datetime.datetime.today()), len(values))) (errors, warnings) = self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values) if errors is not None: print('Error(s) during UserCountry insert: %s' % errors) sys.exit(1) if warnings is not None: print('Warning(s) during UserCountry insert: %s' % warnings) print("%s: Done inserting %s rows into UserCountry table..." % (str(datetime.datetime.today()), len(values))) # ... and loop to process the next INSERT_BULK_SIZE batch def makeIndex(self): self.db.execute( "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);" ) self.db.execute( "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);" ) def close(self): self.db.close()