예제 #1
0
def verify_schema_for_migration(source_replica_set, destination_replica_set,
                                databases, confirm_row_counts):
    """ Confirm that source and destination have schema and row counts in sync

    Args:
    source - A hostaddr instance for the source
    destination -A hostaddr instance for the destination
    dbs - A set of database to check
    confirm_row_counts - If True, check that row counts are very close to
                         synchronized, otherwise do a very cursory check
    """
    zk = host_utils.MysqlZookeeper()
    source_master = zk.get_mysql_instance_from_replica_set(source_replica_set)
    destination_master = zk.get_mysql_instance_from_replica_set(
        destination_replica_set)
    source_slave = zk.get_mysql_instance_from_replica_set(
        source_replica_set, host_utils.REPLICA_ROLE_SLAVE)
    destination_slave = zk.get_mysql_instance_from_replica_set(
        destination_replica_set, host_utils.REPLICA_ROLE_SLAVE)
    problems = list()
    for db in databases:
        source_tables = mysql_lib.get_tables(source_master, db)
        destination_tables = mysql_lib.get_tables(destination_master, db)

        differences = source_tables.symmetric_difference(destination_tables)
        if differences:
            problems.append('Found table existence mismatch in db {db}: {dif}'
                            ''.format(db=db, dif=differences))

        for table in source_tables:
            if table not in destination_tables:
                pass
            source_def = mysql_lib.show_create_table(source_master,
                                                     db,
                                                     table,
                                                     standardize=True)

            destination_def = mysql_lib.show_create_table(destination_master,
                                                          db,
                                                          table,
                                                          standardize=True)

            if source_def != destination_def:
                problems.append('Table definition mismatch db {db} '
                                'table {table}'
                                ''.format(db=db, table=table))

            cnt_problem = check_row_counts(source_slave,
                                           destination_slave,
                                           db,
                                           table,
                                           exact=confirm_row_counts)
            if cnt_problem:
                problems.append(cnt_problem)

    if problems:
        raise Exception('. '.join(problems))

    log.info('Schema and data appear to be in *NSYNC')
예제 #2
0
def check_instance_table(hostaddr, table, desired_hash):
    """ Check that a table on a MySQL instance has the expected schema

    Args:
    hostaddr - object describing which mysql instance to connect to
    table - the name of the table to verify
    desired_hash - the md5sum of the desired CREATE TABLE for the table

    Returns:
    A dictionary with keys that are the hash of the CREATE TABLE statement
    and the values are sets of hostname:port followed by a space and then the
    db one which the incorrect schema was found.
    """
    ret = dict()
    conn = mysql_lib.connect_mysql(hostaddr)
    for db in mysql_lib.get_dbs(conn):
        definition = mysql_lib.show_create_table(conn, db, table)
        tbl_hash = hashlib.md5(definition).hexdigest()
        if tbl_hash != desired_hash:
            if tbl_hash not in ret:
                ret[tbl_hash] = set()
            ret[tbl_hash].add(''.join((hostaddr.__str__(),
                                       ' ',
                                       db)))
    return ret
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        if self.instance.replica_type in environment_specific.SHARDED_DBS_PREFIX_MAP:
            if db != environment_specific.convert_shard_to_db(
                    environment_specific.SHARDED_DBS_PREFIX_MAP[
                        self.instance.replica_type]['example_schema']):
                return
            s3_path_raw = PATH_DAILY_BACKUP_SHARDED_SCHEMA
        else:
            s3_path_raw = PATH_DAILY_BACKUP_NONSHARDED_SCHEMA

        s3_path = s3_path_raw.format(
            table=table,
            hostname_prefix=self.instance.replica_type,
            date=self.datestamp,
            db_name=db)
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {s3_path}'
                  ''.format(s3_path=s3_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                      validate=False)
        key = bucket.new_key(s3_path)
        key.set_contents_from_string(create_stm)
예제 #4
0
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        if self.instance.replica_type in environment_specific.SHARDED_DBS_PREFIX_MAP:
            if db != environment_specific.convert_shard_to_db(
                    environment_specific.SHARDED_DBS_PREFIX_MAP[self.instance.replica_type]['example_schema']):
                return
            s3_path_raw = PATH_DAILY_BACKUP_SHARDED_SCHEMA
        else:
            s3_path_raw = PATH_DAILY_BACKUP_NONSHARDED_SCHEMA

        s3_path = s3_path_raw.format(table=table,
                                     hostname_prefix=self.instance.replica_type,
                                     date=self.datestamp,
                                     db_name=db)
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {s3_path}'
                  ''.format(s3_path=s3_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
        key = bucket.new_key(s3_path)
        key.set_contents_from_string(create_stm)
예제 #5
0
def main():
    parser = argparse.ArgumentParser(description='MySQL schema verifier')
    parser.add_argument(
        'instance_type',
        help='Type of MySQL instance to verify',
        choices=environment_specific.SHARDED_DBS_PREFIX_MAP.keys())
    parser.add_argument(
        'table',
        help='Table to check',
    )
    parser.add_argument(
        'seed_instance',
        help=('Which host from which to fetch a table '
              ' definition. (format hostname[:port])'),
    )
    parser.add_argument('seed_db',
                        help=('Which db on --seed_instance from which to fetch'
                              ' a table definition. (ex pbdata012345)'))
    args = parser.parse_args()
    zk_prefix = environment_specific.SHARDED_DBS_PREFIX_MAP[
        args.instance_type]['zk_prefix']
    seed_instance = host_utils.HostAddr(args.seed_instance)
    desired = mysql_lib.show_create_table(seed_instance, args.seed_db,
                                          args.table)
    tbl_hash = hashlib.md5(desired).hexdigest()
    print("Desired table definition:\n{desired}").format(desired=desired)
    incorrect = check_schema(zk_prefix, args.table, tbl_hash)
    if len(incorrect) == 0:
        print "It appears that all schema is synced"
        sys.exit(0)

    d = difflib.Differ()
    for problem in incorrect.iteritems():
        represenative = list(problem[1])[0].split(' ')
        hostaddr = host_utils.HostAddr(represenative[0])
        create = mysql_lib.show_create_table(hostaddr, represenative[1],
                                             args.table)
        diff = d.compare(desired.splitlines(), create.splitlines())
        print 'The following difference has been found:'
        print '\n'.join(diff)
        print "It is present on the following db's:"
        print '\n'.join(list(problem[1]))
    sys.exit(1)
예제 #6
0
def main():
    parser = argparse.ArgumentParser(description='MySQL schema verifier')
    parser.add_argument('instance_type',
                        help='Type of MySQL instance to verify',
                        choices=('sharddb',
                                 'modsharddb'))
    parser.add_argument('table',
                        help='Table to check',)
    parser.add_argument('seed_instance',
                        help=('Which host from which to fetch a table '
                              ' definition. (format hostname[:port])'),)
    parser.add_argument('seed_db',
                        help=('Which db on --seed_instance from which to fetch'
                              ' a table definition. (ex pbdata012345)'))
    args = parser.parse_args()
    if args.instance_type == 'sharddb':
        zk_prefix = SHARDDB_PREFIX
    elif args.instance_type == 'modsharddb':
        zk_prefix = MODSHARDDB_PREFIX
    seed_instance = host_utils.HostAddr(args.seed_instance)
    seed_conn = mysql_lib.connect_mysql(seed_instance)
    desired = mysql_lib.show_create_table(seed_conn, args.seed_db, args.table)
    tbl_hash = hashlib.md5(desired).hexdigest()
    print ("Desired table definition:\n{desired}").format(desired=desired)
    incorrect = check_schema(zk_prefix, args.table, tbl_hash)
    if len(incorrect) == 0:
        print "It appears that all schema is synced"
        sys.exit(0)

    d = difflib.Differ()
    for problem in incorrect.iteritems():
        represenative = list(problem[1])[0].split(' ')
        hostaddr = host_utils.HostAddr(represenative[0])
        conn = mysql_lib.connect_mysql(hostaddr)
        create = mysql_lib.show_create_table(conn,
                                             represenative[1],
                                             args.table)
        diff = d.compare(desired.splitlines(), create.splitlines())
        print 'The following difference has been found:'
        print '\n'.join(diff)
        print "It is present on the following db's:"
        print '\n'.join(list(problem[1]))
    sys.exit(1)
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
            db - the db to be backed up
            table - the table to be backed up
            tmp_dir_db - temporary storage used for all tables in the db
        """
        (schema_path, _,
         _) = backup.get_csv_backup_paths(self.instance, db, table,
                                          self.datestamp)
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {schema_path}'
                  ''.format(schema_path=schema_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
        key = bucket.new_key(schema_path)
        key.set_contents_from_string(create_stm)
예제 #8
0
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        (schema_path, _, _) = environment_specific.get_csv_backup_paths(
                                     self.datestamp, db, table,
                                     self.instance.replica_type,
                                     self.instance.get_zk_replica_set()[0])
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {schema_path}'
                  ''.format(schema_path=schema_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
        key = bucket.new_key(schema_path)
        key.set_contents_from_string(create_stm)