コード例 #1
0
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'
    
    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd  = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
        self.db.createTable(UserCountryTableCreator.DEST_TABLE, 
                                           OrderedDict({'anon_screen_name' : 'varchar(40) NOT NULL DEFAULT ""',
                                            'two_letter_country' : 'varchar(2) NOT NULL DEFAULT ""',
                                            'three_letter_country' : 'varchar(3) NOT NULL DEFAULT ""',
                                            'country' : 'varchar(255) NOT NULL DEFAULT ""'}))
        
    def fillTable(self):
        values = []
        for (user, ip3LetterCountry) in self.db.query("SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"):
            try:
                (twoLetterCode, threeLetterCode, country) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry)
            except (ValueError,TypeError,KeyError) as e:
                sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`))
                continue
            values.append(tuple(['%s'%user,'%s'%twoLetterCode,'%s'%threeLetterCode,'%s'%country]))
        
        colNameTuple = ('anon_screen_name','two_letter_country','three_letter_country','country')
        self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple, values)

    def makeIndex(self):
        self.db.execute("CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);")
        self.db.execute("CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);")

    def close(self):
        self.db.close()
コード例 #2
0
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'

    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        self.db.dropTable(UserCountryTableCreator.DEST_TABLE)
        self.db.createTable(
            UserCountryTableCreator.DEST_TABLE,
            OrderedDict({
                'anon_screen_name': 'varchar(40) NOT NULL DEFAULT ""',
                'two_letter_country': 'varchar(2) NOT NULL DEFAULT ""',
                'three_letter_country': 'varchar(3) NOT NULL DEFAULT ""',
                'country': 'varchar(255) NOT NULL DEFAULT ""'
            }))

    def fillTable(self):
        values = []
        for (user, ip3LetterCountry) in self.db.query(
                "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"
        ):
            try:
                (twoLetterCode, threeLetterCode, country
                 ) = self.ipCountryXlater.getBy3LetterCode(ip3LetterCountry)
            except (ValueError, TypeError, KeyError) as e:
                sys.stderr.write(
                    "Could not look up one country from (%s/%s): %s\n" %
                    (user, ip3LetterCountry, ` e `))
                continue
            values.append(
                tuple([
                    '%s' % user,
                    '%s' % twoLetterCode,
                    '%s' % threeLetterCode,
                    '%s' % country
                ]))

        colNameTuple = ('anon_screen_name', 'two_letter_country',
                        'three_letter_country', 'country')
        self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE, colNameTuple,
                           values)

    def makeIndex(self):
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);"
        )
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);"
        )

    def close(self):
        self.db.close()
コード例 #3
0
class UserCountryTableCreator(object):

    DEST_TABLE = 'UserCountry'
    # Number of anon ids-country-2-letter-3-letter
    # tuples to accumulate before inserting into
    # UserCountry:
    INSERT_BULK_SIZE = 15000

    def __init__(self, user, pwd):
        self.ipCountryXlater = IpCountryDict()
        self.user = user
        self.pwd = pwd
        self.db = MySQLDB(user=self.user, passwd=self.pwd, db='Edx')
        # Make sure table exists. It should, and it should be filled
        # with all anon_screen_name and countries up the previous
        # load:
        createCmd = '''CREATE TABLE UserCountry (
                         anon_screen_name varchar(40) NOT NULL DEFAULT "",
                         two_letter_country varchar(2) NOT NULL DEFAULT "",
                         three_letter_country varchar(3) NOT NULL DEFAULT "",
                         country varchar(255) NOT NULL DEFAULT ""
                         ) ENGINE=MyISAM;
                         '''
        self.db.dropTable('UserCountry')
        print("Creating table UserCountry...")
        self.db.execute(createCmd)
        print("Done creating table UserCountry.")

    def fillTable(self):
        query = "SELECT DISTINCT anon_screen_name, ip_country FROM EventXtract"
        query_res_it = self.db.query(query)
        done = False
        # Order of columns for insert:
        colNameTuple = ('anon_screen_name', 'two_letter_country',
                        'three_letter_country', 'country')

        while not done:
            values = []
            print("%s: Starting one set of %s lookups..." %\
                  (str(datetime.datetime.today()),
                   UserCountryTableCreator.INSERT_BULK_SIZE))
            for _ in range(UserCountryTableCreator.INSERT_BULK_SIZE):
                try:
                    (anon_screen_name, ip3LetterCountry) = query_res_it.next()
                except StopIteration:
                    done = True
                    break
                # Try translating:
                try:
                    (twoLetterCode, threeLetterCode,
                     country) = self.ipCountryXlater.getBy3LetterCode(
                         ip3LetterCountry)
                except (ValueError, TypeError, KeyError):
                    twoLetterCode = 'XX'
                    threeLetterCode = 'XXX'
                    country = 'Not in lookup tbl'
                    #sys.stderr.write("Could not look up one country from (%s/%s): %s\n" % (user, ip3LetterCountry,`e`))
                values.append(
                    tuple([
                        '%s' % anon_screen_name,
                        '%s' % twoLetterCode,
                        '%s' % threeLetterCode,
                        '%s' % country
                    ]))

            # Insert this chunk into the UserCountry table
            print("%s: Inserting %s rows into UserCountry table..." %
                  (str(datetime.datetime.today()), len(values)))
            (errors,
             warnings) = self.db.bulkInsert(UserCountryTableCreator.DEST_TABLE,
                                            colNameTuple, values)
            if errors is not None:
                print('Error(s) during UserCountry insert: %s' % errors)
                sys.exit(1)
            if warnings is not None:
                print('Warning(s) during UserCountry insert: %s' % warnings)

            print("%s: Done inserting %s rows into UserCountry table..." %
                  (str(datetime.datetime.today()), len(values)))
            # ... and loop to process the next INSERT_BULK_SIZE batch

    def makeIndex(self):
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryAnonIdx', 'UserCountry', 'anon_screen_name', 40);"
        )
        self.db.execute(
            "CALL createIndexIfNotExists('UserCountryThreeLetIdx', 'UserCountry', 'three_letter_country', 3);"
        )

    def close(self):
        self.db.close()