def unify(cur): """ When a new table comes in, reduce it using reduce_log() and then run this function to incorporate it into the unified table, along with partitioning """ cur.execute("USE reduced_log") cur.execute('SHOW TABLES') tables = set(x for x, in cur.fetchall()) cur.execute("""SELECT PARTITION_NAME FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_SCHEMA = 'reduced_log' AND TABLE_NAME = 'unified'""") partitions = set(x for x, in cur.fetchall()) tables_to_add = sorted(tables - partitions - set(['unified', 'users', 'servers', 'unified_users', 'unified_servers'])) for table in tables_to_add: print_and_execute("""ALTER TABLE unified REORGANIZE PARTITION other INTO ({0}, PARTITION other VALUES LESS THAN MAXVALUE)""".format(partition_from_str(table)), cur) print_and_execute("INSERT INTO unified SELECT * FROM {0}".format(table), cur)
def create_unified(cur): """ When the reduced_log.unified table does not exist, or when the schema changes, run this function to regenerate it. @Precondition: all the tables in @initial_tables must be in the reduced log """ cur.execute("USE reduced_log") # Get list of 2 initial tables cur.execute("SHOW TABLES") initial_tables = [x for x, in cur.fetchall()][:2] print_and_execute("CREATE TABLE unified {0}".format(" UNION ALL ".join("SELECT * FROM {0}".format(t) for t in initial_tables)), cur) print_and_execute("ALTER TABLE unified ADD INDEX (userid)", cur) print_and_execute("ALTER TABLE unified ADD INDEX (serverid)", cur) print_and_execute("ALTER TABLE unified ADD INDEX (event_time)", cur) print_and_execute("ALTER TABLE unified PARTITION BY RANGE( TO_DAYS(event_time) ) ( " + ", ".join(partition_from_str(t) for t in initial_tables) + ", PARTITION other VALUES LESS THAN MAXVALUE" + ")", cur)