def verify_schema_for_migration(source_replica_set, destination_replica_set, databases, confirm_row_counts): """ Confirm that source and destination have schema and row counts in sync Args: source - A hostaddr instance for the source destination -A hostaddr instance for the destination dbs - A set of database to check confirm_row_counts - If True, check that row counts are very close to synchronized, otherwise do a very cursory check """ zk = host_utils.MysqlZookeeper() source_master = zk.get_mysql_instance_from_replica_set(source_replica_set) destination_master = zk.get_mysql_instance_from_replica_set( destination_replica_set) source_slave = zk.get_mysql_instance_from_replica_set( source_replica_set, host_utils.REPLICA_ROLE_SLAVE) destination_slave = zk.get_mysql_instance_from_replica_set( destination_replica_set, host_utils.REPLICA_ROLE_SLAVE) problems = list() for db in databases: source_tables = mysql_lib.get_tables(source_master, db) destination_tables = mysql_lib.get_tables(destination_master, db) differences = source_tables.symmetric_difference(destination_tables) if differences: problems.append('Found table existence mismatch in db {db}: {dif}' ''.format(db=db, dif=differences)) for table in source_tables: if table not in destination_tables: pass source_def = mysql_lib.show_create_table(source_master, db, table, standardize=True) destination_def = mysql_lib.show_create_table(destination_master, db, table, standardize=True) if source_def != destination_def: problems.append('Table definition mismatch db {db} ' 'table {table}' ''.format(db=db, table=table)) cnt_problem = check_row_counts(source_slave, destination_slave, db, table, exact=confirm_row_counts) if cnt_problem: problems.append(cnt_problem) if problems: raise Exception('. '.join(problems)) log.info('Schema and data appear to be in *NSYNC')
def check_instance_table(hostaddr, table, desired_hash): """ Check that a table on a MySQL instance has the expected schema Args: hostaddr - object describing which mysql instance to connect to table - the name of the table to verify desired_hash - the md5sum of the desired CREATE TABLE for the table Returns: A dictionary with keys that are the hash of the CREATE TABLE statement and the values are sets of hostname:port followed by a space and then the db one which the incorrect schema was found. """ ret = dict() conn = mysql_lib.connect_mysql(hostaddr) for db in mysql_lib.get_dbs(conn): definition = mysql_lib.show_create_table(conn, db, table) tbl_hash = hashlib.md5(definition).hexdigest() if tbl_hash != desired_hash: if tbl_hash not in ret: ret[tbl_hash] = set() ret[tbl_hash].add(''.join((hostaddr.__str__(), ' ', db))) return ret
def upload_schema(self, db, table, tmp_dir_db): """ Upload the schema of a table to s3 Args: db - the db to be backed up table - the table to be backed up tmp_dir_db - temporary storage used for all tables in the db """ if self.instance.replica_type in environment_specific.SHARDED_DBS_PREFIX_MAP: if db != environment_specific.convert_shard_to_db( environment_specific.SHARDED_DBS_PREFIX_MAP[ self.instance.replica_type]['example_schema']): return s3_path_raw = PATH_DAILY_BACKUP_SHARDED_SCHEMA else: s3_path_raw = PATH_DAILY_BACKUP_NONSHARDED_SCHEMA s3_path = s3_path_raw.format( table=table, hostname_prefix=self.instance.replica_type, date=self.datestamp, db_name=db) create_stm = mysql_lib.show_create_table(self.instance, db, table) log.debug('{proc_id}: Uploading schema to {s3_path}' ''.format(s3_path=s3_path, proc_id=multiprocessing.current_process().name)) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) key = bucket.new_key(s3_path) key.set_contents_from_string(create_stm)
def upload_schema(self, db, table, tmp_dir_db): """ Upload the schema of a table to s3 Args: db - the db to be backed up table - the table to be backed up tmp_dir_db - temporary storage used for all tables in the db """ if self.instance.replica_type in environment_specific.SHARDED_DBS_PREFIX_MAP: if db != environment_specific.convert_shard_to_db( environment_specific.SHARDED_DBS_PREFIX_MAP[self.instance.replica_type]['example_schema']): return s3_path_raw = PATH_DAILY_BACKUP_SHARDED_SCHEMA else: s3_path_raw = PATH_DAILY_BACKUP_NONSHARDED_SCHEMA s3_path = s3_path_raw.format(table=table, hostname_prefix=self.instance.replica_type, date=self.datestamp, db_name=db) create_stm = mysql_lib.show_create_table(self.instance, db, table) log.debug('{proc_id}: Uploading schema to {s3_path}' ''.format(s3_path=s3_path, proc_id=multiprocessing.current_process().name)) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) key = bucket.new_key(s3_path) key.set_contents_from_string(create_stm)
def main(): parser = argparse.ArgumentParser(description='MySQL schema verifier') parser.add_argument( 'instance_type', help='Type of MySQL instance to verify', choices=environment_specific.SHARDED_DBS_PREFIX_MAP.keys()) parser.add_argument( 'table', help='Table to check', ) parser.add_argument( 'seed_instance', help=('Which host from which to fetch a table ' ' definition. (format hostname[:port])'), ) parser.add_argument('seed_db', help=('Which db on --seed_instance from which to fetch' ' a table definition. (ex pbdata012345)')) args = parser.parse_args() zk_prefix = environment_specific.SHARDED_DBS_PREFIX_MAP[ args.instance_type]['zk_prefix'] seed_instance = host_utils.HostAddr(args.seed_instance) desired = mysql_lib.show_create_table(seed_instance, args.seed_db, args.table) tbl_hash = hashlib.md5(desired).hexdigest() print("Desired table definition:\n{desired}").format(desired=desired) incorrect = check_schema(zk_prefix, args.table, tbl_hash) if len(incorrect) == 0: print "It appears that all schema is synced" sys.exit(0) d = difflib.Differ() for problem in incorrect.iteritems(): represenative = list(problem[1])[0].split(' ') hostaddr = host_utils.HostAddr(represenative[0]) create = mysql_lib.show_create_table(hostaddr, represenative[1], args.table) diff = d.compare(desired.splitlines(), create.splitlines()) print 'The following difference has been found:' print '\n'.join(diff) print "It is present on the following db's:" print '\n'.join(list(problem[1])) sys.exit(1)
def main(): parser = argparse.ArgumentParser(description='MySQL schema verifier') parser.add_argument('instance_type', help='Type of MySQL instance to verify', choices=('sharddb', 'modsharddb')) parser.add_argument('table', help='Table to check',) parser.add_argument('seed_instance', help=('Which host from which to fetch a table ' ' definition. (format hostname[:port])'),) parser.add_argument('seed_db', help=('Which db on --seed_instance from which to fetch' ' a table definition. (ex pbdata012345)')) args = parser.parse_args() if args.instance_type == 'sharddb': zk_prefix = SHARDDB_PREFIX elif args.instance_type == 'modsharddb': zk_prefix = MODSHARDDB_PREFIX seed_instance = host_utils.HostAddr(args.seed_instance) seed_conn = mysql_lib.connect_mysql(seed_instance) desired = mysql_lib.show_create_table(seed_conn, args.seed_db, args.table) tbl_hash = hashlib.md5(desired).hexdigest() print ("Desired table definition:\n{desired}").format(desired=desired) incorrect = check_schema(zk_prefix, args.table, tbl_hash) if len(incorrect) == 0: print "It appears that all schema is synced" sys.exit(0) d = difflib.Differ() for problem in incorrect.iteritems(): represenative = list(problem[1])[0].split(' ') hostaddr = host_utils.HostAddr(represenative[0]) conn = mysql_lib.connect_mysql(hostaddr) create = mysql_lib.show_create_table(conn, represenative[1], args.table) diff = d.compare(desired.splitlines(), create.splitlines()) print 'The following difference has been found:' print '\n'.join(diff) print "It is present on the following db's:" print '\n'.join(list(problem[1])) sys.exit(1)
def upload_schema(self, db, table, tmp_dir_db): """ Upload the schema of a table to s3 Args: db - the db to be backed up table - the table to be backed up tmp_dir_db - temporary storage used for all tables in the db """ (schema_path, _, _) = backup.get_csv_backup_paths(self.instance, db, table, self.datestamp) create_stm = mysql_lib.show_create_table(self.instance, db, table) log.debug('{proc_id}: Uploading schema to {schema_path}' ''.format(schema_path=schema_path, proc_id=multiprocessing.current_process().name)) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(self.upload_bucket, validate=False) key = bucket.new_key(schema_path) key.set_contents_from_string(create_stm)
def upload_schema(self, db, table, tmp_dir_db): """ Upload the schema of a table to s3 Args: db - the db to be backed up table - the table to be backed up tmp_dir_db - temporary storage used for all tables in the db """ (schema_path, _, _) = environment_specific.get_csv_backup_paths( self.datestamp, db, table, self.instance.replica_type, self.instance.get_zk_replica_set()[0]) create_stm = mysql_lib.show_create_table(self.instance, db, table) log.debug('{proc_id}: Uploading schema to {schema_path}' ''.format(schema_path=schema_path, proc_id=multiprocessing.current_process().name)) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(self.upload_bucket, validate=False) key = bucket.new_key(schema_path) key.set_contents_from_string(create_stm)