def get_tables_to_backup(self, db):
        """ Determine which tables should be backed up in a db

        Returns:
        a set of table names
        """
        if self.force_table:
            if self.force_table not in mysql_lib.get_tables(self.instance, db, skip_views=True):
                raise Exception('Requested table {t} does not exist in db {d}'
                                ''.format(t=self.force_table, d=db))
            return set([self.force_table])
        else:
            return mysql_lib.get_tables(self.instance, db, skip_views=True)
Beispiel #2
0
def get_db_size_from_log(instance, db):
    """ Get yesterdays db size for an instance

    Args:
    instance - A hostaddr object
    db - A database that exists on the instance

    Returns: size in MB
    """
    conn = mysql_lib.connect_mysql(instance, 'dbascript')
    cursor = conn.cursor()
    sql = ("SELECT SUM(size_mb) as 'mb', "
           "        COUNT(1) as 'table_count' "
           "FROM  {metadata_db}.{tbl} "
           "WHERE db = %(db)s "
           "    AND reported_at=CURDATE() - INTERVAL 1 DAY "
           "    AND hostname=%(hostname)s and port=%(port)s "
           "GROUP BY db;")
    params = {'hostname': instance.hostname, 'port': instance.port, 'db': db}
    cursor.execute(
        sql.format(metadata_db=mysql_lib.METADATA_DB, tbl=TABLE_SIZE_TBL),
        params)
    ret = cursor.fetchone()

    expected_tables = mysql_lib.get_tables(instance, db, skip_views=True)
    if ret['table_count'] != len(expected_tables):
        raise Exception('Size data appears to be missing for {db} on {inst}'
                        ''.format(db=db, inst=instance))
    return ret['mb']
def verify_csv_schema_upload(shard_type, date, schema_host, schema_db,
                             schema_upload_path_raw):
    """ Confirm that schema files are uploaded

    Args:
    shard_type - In this case, a hostname or shard type (generally
                 one in the same)
    date - The date to search for
    schema_host - A for to examine to find which tables should exist
    schema_db - Which db to inxpect on schema_host
    schema_upload_path_raw - A string that can be format'ed in order to create
                             a S3 key path

    Returns True for no problems found, False otherwise.
    """
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    tables = mysql_lib.get_tables(schema_host,
                                  environment_specific.convert_shard_to_db(schema_db),
                                  skip_views=True)
    return_status = True
    for table in tables:
        path = schema_upload_path_raw.format(table=table,
                                             hostname_prefix=shard_type,
                                             date=date,
                                             db_name=schema_db)
        if not bucket.get_key(path):
            print 'Expected key {key} is missing'.format(key=path)
            return_status = False
    return return_status, tables
Beispiel #4
0
def verify_csv_schema_upload(shard_type, date, schema_host, schema_db,
                             schema_upload_path_raw):
    """ Confirm that schema files are uploaded

    Args:
    shard_type - In this case, a hostname or shard type (generally
                 one in the same)
    date - The date to search for
    schema_host - A for to examine to find which tables should exist
    schema_db - Which db to inxpect on schema_host
    schema_upload_path_raw - A string that can be format'ed in order to create
                             a S3 key path

    Returns True for no problems found, False otherwise.
    """
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    tables = mysql_lib.get_tables(
        schema_host,
        environment_specific.convert_shard_to_db(schema_db),
        skip_views=True)
    return_status = True
    for table in tables:
        path = schema_upload_path_raw.format(table=table,
                                             hostname_prefix=shard_type,
                                             date=date,
                                             db_name=schema_db)
        if not bucket.get_key(path):
            print 'Expected key {key} is missing'.format(key=path)
            return_status = False
    return return_status, tables
def verify_unsharded_csv_backup(shard_type, date, instance):
    """ Verify that a non-sharded db has been backed up to hive

    Args:
    shard_type - In this case, a hostname prefix
    date - The date to search for
    instance - The actual instance to inspect for backups being done

    Returns True for no problems found, False otherwise.
    """
    if (date == (datetime.datetime.utcnow().date() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")):
        if datetime.datetime.utcnow().time() < CSV_STARTUP:
            print 'Backup startup time has not yet passed'
            # For todays date, we give CSV_STARTUP minutes before checking anything.
            return True

        if datetime.datetime.utcnow().time() < CSV_COMPLETION_TIME:
            # For todays date, until after CSV_COMPLETION_TIME it is good enough
            # to check if backups are running. If they are running, everything
            # is ok. If they are not running, we will do all the normal checks.
            if csv_backups_running(instance):
                print 'Backup running on {i}'.format(i=instance)
                return True

    return_status = True
    for db in mysql_lib.get_dbs(instance):
        (success, _) = \
            verify_csv_schema_upload(shard_type, date, instance, db,
                                     mysql_backup_csv.PATH_DAILY_BACKUP_NONSHARDED_SCHEMA)
        if not success:
            return_status = False

    if not return_status:
        print 'missing schema file'
        # problem with schema, don't bother verifying data
        return return_status

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    missing_uploads = set()
    for db in mysql_lib.get_dbs(instance):
        for table in mysql_lib.get_tables(instance, db, skip_views=True):
            key = mysql_backup_csv.PATH_DAILY_BACKUP.format(table=table,
                                                            hostname_prefix=shard_type,
                                                            date=date,
                                                            db_name=db)
            if not bucket.get_key(key):
                missing_uploads.add(key)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {uploads}'.format(uploads=missing_uploads)
        else:
            print 'Missing {num} uploads'.format(num=len(missing_uploads))
    else:
        return True
def verify_unsharded_csv_backup(shard_type, date, instance):
    """ Verify that a non-sharded db has been backed up to hive

    Args:
    shard_type - In this case, a hostname prefix
    date - The date to search for
    instance - The actual instance to inspect for backups being done

    Returns True for no problems found, False otherwise.
    """
    if (date == (datetime.datetime.utcnow().date() -
                 datetime.timedelta(days=1)).strftime("%Y-%m-%d")):
        if datetime.datetime.utcnow().time() < CSV_STARTUP:
            print 'Backup startup time has not yet passed'
            # For todays date, we give CSV_STARTUP minutes before checking anything.
            return True

        if datetime.datetime.utcnow().time() < CSV_COMPLETION_TIME:
            # For todays date, until after CSV_COMPLETION_TIME it is good enough
            # to check if backups are running. If they are running, everything
            # is ok. If they are not running, we will do all the normal checks.
            if csv_backups_running(instance):
                print 'Backup running on {i}'.format(i=instance)
                return True

    return_status = True
    for db in mysql_lib.get_dbs(instance):
        (success, _) = \
            verify_csv_schema_upload(shard_type, date, instance, db,
                                     mysql_backup_csv.PATH_DAILY_BACKUP_NONSHARDED_SCHEMA)
        if not success:
            return_status = False

    if not return_status:
        print 'missing schema file'
        # problem with schema, don't bother verifying data
        return return_status

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    missing_uploads = set()
    for db in mysql_lib.get_dbs(instance):
        for table in mysql_lib.get_tables(instance, db, skip_views=True):
            key = mysql_backup_csv.PATH_DAILY_BACKUP.format(
                table=table, hostname_prefix=shard_type, date=date, db_name=db)
            if not bucket.get_key(key):
                missing_uploads.add(key)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {uploads}'.format(uploads=missing_uploads)
        else:
            print 'Missing {num} uploads'.format(num=len(missing_uploads))
    else:
        return True
def drop_db_after_rename(instance, dbs, verbose, dry_run):
    """ Drop the original empty db and a non-empty rename db

    Args:
    instance - a hostaddr object
    dbs -  a set of database names
    verbose - bool, will direct sql to stdout
    dry_run - bool, will make no changes to
    """

    # confirm db is not in zk and not in use
    orphaned, _, _ = find_shard_mismatches.find_shard_mismatches(instance)
    instance_orphans = orphaned[instance.__str__()]
    unexpected = dbs.difference(instance_orphans)
    if unexpected:
        print ''.join(("Cowardly refusing to act on the following dbs: ",
                       ','.join(unexpected)))
        sys.exit(1)

    # make sure the original db is empty
    for db in dbs:
        if mysql_lib.get_tables(instance, db):
            print ''.join(("Cowardly refusing to drop non-empty db:",
                           db))
            sys.exit(1)

    conn = mysql_lib.connect_mysql(instance)
    cursor = conn.cursor()
    for db in dbs:
        # we should be good to drop the old empty dbs
        raw_sql = 'DROP DATABASE IF EXISTS `{db}`;'
        sql = raw_sql.format(db=db)
        if verbose:
            print sql
        if not dry_run:
            cursor.execute(sql)

        # and we should be ok to drop the non-empty 'dropme_' prepended db
        renamed_db = ''.join((DB_PREPEND, db))
        sql = raw_sql.format(db=renamed_db)
        if verbose:
            print sql
        if not dry_run:
            cursor.execute(sql)
def drop_db_after_rename(instance, dbs, verbose, dry_run):
    """ Drop the original empty db and a non-empty rename db

    Args:
    instance - a hostaddr object
    dbs -  a set of database names
    verbose - bool, will direct sql to stdout
    dry_run - bool, will make no changes to
    """

    # confirm db is not in zk and not in use
    orphaned, _, _ = find_shard_mismatches.find_shard_mismatches(instance)
    instance_orphans = orphaned[instance.__str__()]
    unexpected = dbs.difference(instance_orphans)
    if unexpected:
        print ''.join(("Cowardly refusing to act on the following dbs: ",
                       ','.join(unexpected)))
        sys.exit(1)

    # make sure the original db is empty
    conn = mysql_lib.connect_mysql(instance)
    cursor = conn.cursor()
    for db in dbs:
        if mysql_lib.get_tables(conn, db):
            print ''.join(("Cowardly refusing to drop non-empty db:",
                           db))
            sys.exit(1)

    for db in dbs:
        # we should be good to drop the old empty dbs
        raw_sql = 'DROP DATABASE IF EXISTS `{db}`;'
        sql = raw_sql.format(db=db)
        if verbose:
            print sql
        if not dry_run:
            cursor.execute(sql)

        # and we should be ok to drop the non-empty 'dropme_' prepended db
        renamed_db = ''.join((DB_PREPEND, db))
        sql = raw_sql.format(db=renamed_db)
        if verbose:
            print sql
        if not dry_run:
            cursor.execute(sql)
Beispiel #9
0
    def get_tables_to_backup(self, db):
        """ Determine which tables should be backed up in a db

        Args:
        db -  The db for which we need a list of tables eligible for backup

        Returns:
        a set of table names
        """
        tables = environment_specific.filter_tables_to_csv_backup(
            self.instance, db,
            mysql_lib.get_tables(self.instance, db, skip_views=True))
        if not self.force_table:
            return tables

        if self.force_table not in tables:
            raise Exception('Requested table {t} is not available to backup'
                            ''.format(t=self.force_table))
        else:
            return set([self.force_table])
    def get_tables_to_backup(self, db):
        """ Determine which tables should be backed up in a db

        Args:
        db -  The db for which we need a list of tables eligible for backup

        Returns:
        a set of table names
        """
        tables = environment_specific.filter_tables_to_csv_backup(
                     self.instance, db,
                     mysql_lib.get_tables(self.instance, db, skip_views=True))
        if not self.force_table:
            return tables

        if self.force_table not in tables:
            raise Exception('Requested table {t} is not available to backup'
                            ''.format(t=self.force_table))
        else:
            return set([self.force_table])
    def __init__(self,
                 instance,
                 db=None,
                 force_table=None,
                 force_reupload=False,
                 dev_bucket=False):
        """ Init function for backup, takes all args

        Args:
        instance - A hostAddr obect of the instance to be baced up
        db - (option) backup only specified db
        force_table - (option) backup only specified table
        force_reupload - (optional) force reupload of backup
        """
        self.instance = instance
        self.session_id = None
        self.timestamp = datetime.datetime.utcnow()
        # datestamp is for s3 files which are by convention -1 day
        self.datestamp = (self.timestamp -
                          datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        self.tables_to_backup = multiprocessing.Queue()
        self.tables_to_retry = multiprocessing.Queue()
        if db:
            table_list = [
                '{}.{}'.format(db, x)
                for x in mysql_lib.get_tables(instance, db, True)
            ]
        else:
            table_list = mysql_lib.get_all_tables_by_instance(instance)

        for t in backup.filter_tables_to_csv_backup(instance, table_list):
            self.tables_to_backup.put(t)

        self.dev_bucket = dev_bucket
        self.force_table = force_table
        self.force_reupload = force_reupload
        self.table_count = 0
        self.upload_bucket = environment_specific.S3_CSV_BUCKET_DEV \
            if dev_bucket else environment_specific.S3_CSV_BUCKET
Beispiel #12
0
def drop_db_after_rename(instance, dbs=None, dry_run=False):
    """ Drop the original empty db and a non-empty rename db

    Args:
        instance - a hostaddr object
        dbs -  a set of database names
        dry_run - bool, will make no changes to the servers
    """
    if not dbs:
        dbs = set()
        for db in mysql_lib.get_dbs(instance):
            if db.startswith(DB_PREPEND):
                dbs.add(db[len(DB_PREPEND):])

    # confirm db is not in zk and not in use
    orphaned, _, _ = find_shard_mismatches.find_shard_mismatches(instance)
    instance_orphans = orphaned[instance]
    unexpected = dbs.difference(instance_orphans)
    if unexpected:
        raise Exception('Cowardly refusing to act on the following '
                        'dbs: {}'.format(unexpected))

    # make sure the original db is empty
    for db in dbs:
        if mysql_lib.get_tables(instance, db):
            raise Exception('Cowardly refusing to drop non-empty '
                            'db: {}'.format(db))

    for db in dbs:
        renamed_db = ''.join((DB_PREPEND, db))
        if dry_run:
            log.info('dry_run is enabled, not dropping '
                     'dbs: {db} {renamed}'.format(db=db, renamed=renamed_db))
        else:
            mysql_lib.drop_db(instance, db)
            mysql_lib.drop_db(instance, renamed_db)
Beispiel #13
0
def main():
    description = ("MySQL checksum wrapper\n\n"
                   "Wrapper of pt-table-checksum and pt-table-sync.\n"
                   "Defaults to checksumming 1/{k}th of databases on instance.\n"
                   "If diffs are found, use pt-table-sync to measure actual "
                   "divergence,\nbut only if the number of diffs is between "
                   "--min_diffs and --max_diffs.").format(k=DB_CHECK_FRACTION)

    parser = argparse.ArgumentParser(description=description,
                                     formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-i',
                        '--instance',
                        help='Instance to act on if other than localhost:3306',
                        default=''.join((socket.getfqdn(),
                                         ':3306')))
    parser.add_argument('-a',
                        '--all',
                        help='Checksums all dbs rather than the default',
                        action='store_true',
                        default=False)
    parser.add_argument('-d',
                        '--dbs',
                        help=("Comma separated list of db's to check rather "
                              "than the default"),
                        default=False)
    parser.add_argument('-q',
                        '--quiet',
                        help=("Do not print output to stdout"),
                        action='store_true',
                        default=False)
    parser.add_argument('-m',
                        '--min_diffs',
                        help=("Do per-row check if chunk diff count is at "
                              "least this value"),
                        dest='min_diffs',
                        default=MIN_DIFFS)
    parser.add_argument('-M',
                        '--max_diffs',
                        help=("Do not do per-row check if chunk diff count "
                              "is greater than this value"),
                        dest='max_diffs',
                        default=MAX_DIFFS)
    parser.add_argument('-C',
                        '--no_create_table',
                        help=("If test.checksum_detail is missing, do "
                              "not try to create it."),
                        dest='create_table',
                        action='store_false',
                        default=True)
    parser.add_argument('-v',
                        '--verbose',
                        help=("Store raw output from PT tools in the DB?"),
                        action='store_true',
                        default=False)
    parser.add_argument('-c',
                        '--check_fraction',
                        help=('Check this fraction of databases.'),
                        default=DB_CHECK_FRACTION)

    args = parser.parse_args()
    instance = host_utils.HostAddr(args.instance)
    zk = host_utils.MysqlZookeeper()

    if instance not in \
            zk.get_all_mysql_instances_by_type(host_utils.REPLICA_ROLE_MASTER):
        raise Exception("Instance is not a master in ZK")

    # If enabled, try to create the table that holds the checksum info.
    # If not enabled, make sure that the table exists.
    conn = mysql_lib.connect_mysql(instance, 'scriptro')
    if not mysql_lib.does_table_exist(conn, mysql_lib.METADATA_DB, CHECKSUM_TBL):
        if args.create_table:
            create_checksum_detail_table(instance)
        else:
            raise Exception("Checksum table not found.  Unable to continue."
                            "Consider not using the -C option or create it "
                            "yourself.")

    # Determine what replica set we belong to and get a list of slaves.
    replica_set = zk.get_replica_set_from_instance(instance)[0]
    slaves = set()
    for rtype in host_utils.REPLICA_ROLE_SLAVE, host_utils.REPLICA_ROLE_DR_SLAVE:
        s = zk.get_mysql_instance_from_replica_set(replica_set, rtype)
        if s:
            slaves.add(s)

    if len(slaves) == 0:
        log.info("This server has no slaves.  Nothing to do.")
        sys.exit(0)

    # before we even start this, make sure replication is OK.
    for slave in slaves:
        slave_conn = mysql_lib.connect_mysql(slave, 'scriptrw')
        ss = mysql_lib.get_slave_status(slave_conn)
        if ss['Slave_SQL_Running'] != "Yes" or ss['Slave_IO_Running'] != "Yes":
            raise Exception("Replication is NOT RUNNING on slave {s}: "
                            "SQL: {st} | IO: {it}".format(st=ss['Slave_SQL_Running'],
                                                          it=ss['Slave_IO_Running']))

    if args.dbs:
        db_to_check = set(args.dbs.split(','))
    else:
        dbs = mysql_lib.get_dbs(conn)

        if args.all:
            db_to_check = dbs
        else:
            # default behaviour, check a given DB every N days based on
            # day of year.  minimizes month-boundary issues.
            db_to_check = set()
            check_modulus = int(time.strftime("%j")) % int(args.check_fraction)
            counter = 0
            for db in dbs:
                modulus = counter % int(args.check_fraction)
                if modulus == check_modulus:
                    db_to_check.add(db)
                counter = counter + 1

    # Iterate through the list of DBs and check one table at a time.
    # We do it this way to ensure more coverage in case pt-table-checksum
    # loses its DB connection and errors out before completing a full scan
    # of a given database.
    #
    for db in db_to_check:
        conn = mysql_lib.connect_mysql(instance, 'scriptro')
        tables_to_check = mysql_lib.get_tables(conn, db, skip_views=True)
        for tbl in tables_to_check:
            c_cmd, c_out, c_err, c_ret = checksum_tbl(instance, db, tbl)
            if not args.quiet:
                log.info("Checksum command executed was:\n{cmd}".format(cmd=c_cmd))
                log.info("Standard out:\n{out}".format(out=c_out))
                log.info("Standard error:\n{err}".format(err=c_err))
                log.info("Return code: {ret}".format(ret=c_ret))

            # parse each line of STDOUT (there should only be one with
            # actual data).  We only care about errors, rows, chunks, and
            # skipped, since we'll need to figure out diffs separately for
            # each slave box.
            for line in c_out.split("\n"):
                results = parse_checksum_row(line)
                if results:
                    chunk_errors = int(results[1])
                    row_count = int(results[3])
                    chunk_count = int(results[4])
                    chunk_skips = int(results[5])

                    for slave in slaves:
                        rows_checked = 'NO'
                        sync_cmd = ""
                        sync_out = ""
                        sync_err = ""
                        sync_ret = -1
                        row_diffs = 0

                        elapsed_time_ms,\
                            chunk_diffs = check_one_replica(slave,
                                                            db, tbl)

                        # if we skipped some chunks or there were errors,
                        # this means we can't have complete information about the
                        # state of the replica. in the case of a hard error,
                        # we'll just stop.  in the case of a skipped chunk, we will
                        # treat it as a different chunk for purposes of deciding
                        # whether or not to do a more detailed analysis.
                        #
                        checkable_chunks = chunk_skips + chunk_diffs

                        if chunk_errors > 0:
                            checksum_status = 'ERRORS_IN_CHECKSUM_PROCESS'
                        elif checkable_chunks == 0:
                            checksum_status = 'GOOD'
                        else:
                            if checkable_chunks > int(args.max_diffs):
                                # too many chunk diffs, don't bother checking
                                # further.  not good.
                                checksum_status = 'TOO_MANY_CHUNK_DIFFS'
                            elif checkable_chunks < int(args.min_diffs):
                                # some diffs, but not enough that we care.
                                checksum_status = 'CHUNK_DIFFS_FOUND_BUT_OK'
                            else:
                                start_time = int(time.time()*1000)
                                rows_checked = 'YES'

                                # set the proper status - did we do a sync-based check
                                # because of explicit diffs or because of skipped chunks?
                                if chunk_diffs > 0:
                                    checksum_status = 'ROW_DIFFS_FOUND'
                                else:
                                    checksum_status = 'CHUNKS_WERE_SKIPPED'

                                sync_cmd, sync_out, sync_err, sync_ret, \
                                    row_diffs = checksum_tbl_via_sync(slave,
                                                                      db,
                                                                      tbl)

                                # Add in the time it took to do the sync.
                                elapsed_time_ms += int(time.time()*1000) - start_time

                                if not args.quiet:
                                    log.info("Sync command executed was:\n{cmd} ".format(cmd=sync_cmd))
                                    log.info("Standard out:\n {out}".format(out=sync_out))
                                    log.info("Standard error:\n {err}".format(err=sync_err))
                                    log.info("Return code: {ret}".format(ret=sync_ret))
                                    log.info("Row diffs found: {cnt}".format(cnt=row_diffs))

                        # Checksum process is complete, store the results.
                        #
                        data = {'instance': slave,
                                'master_instance': instance,
                                'db': db,
                                'tbl': tbl,
                                'elapsed_time_ms': elapsed_time_ms,
                                'chunk_count': chunk_count,
                                'chunk_errors': chunk_errors,
                                'chunk_diffs': chunk_diffs,
                                'chunk_skips': chunk_skips,
                                'row_count': row_count,
                                'row_diffs': row_diffs,
                                'rows_checked': rows_checked,
                                'checksum_status': checksum_status,
                                'checksum_cmd': None,
                                'checksum_stdout': None,
                                'checksum_stderr': None,
                                'checksum_rc': c_ret,
                                'sync_cmd': None,
                                'sync_stdout': None,
                                'sync_stderr': None,
                                'sync_rc': sync_ret}

                        if args.verbose:
                            data.update({'checksum_cmd': c_cmd,
                                         'checksum_stdout': c_out,
                                         'checksum_stderr': c_err,
                                         'sync_cmd': sync_cmd,
                                         'sync_stdout': sync_out,
                                         'sync_stderr': sync_err,
                                         'sync_rc': sync_ret})

                        write_checksum_status(instance, data)

        conn.close()
Beispiel #14
0
            db_to_check = set()
            check_modulus = int(time.strftime("%j")) % int(args.check_fraction)
            counter = 0
            for db in dbs:
                modulus = counter % int(args.check_fraction)
                if modulus == check_modulus:
                    db_to_check.add(db)
                counter = counter + 1

    # Iterate through the list of DBs and check one table at a time.
    # We do it this way to ensure more coverage in case pt-table-checksum
    # loses its DB connection and errors out before completing a full scan
    # of a given database.
    #
    for db in db_to_check:
        tables_to_check = mysql_lib.get_tables(instance, db, skip_views=True)
        for tbl in tables_to_check:
            c_cmd, c_out, c_err, c_ret = checksum_tbl(instance, db, tbl)
            if not args.quiet:
                log.info(
                    "Checksum command executed was:\n{cmd}".format(cmd=c_cmd))
                log.info("Standard out:\n{out}".format(out=c_out))
                log.info("Standard error:\n{err}".format(err=c_err))
                log.info("Return code: {ret}".format(ret=c_ret))

            # parse each line of STDOUT (there should only be one with
            # actual data).  We only care about errors, rows, chunks, and
            # skipped, since we'll need to figure out diffs separately for
            # each slave box.
            for line in c_out.split("\n"):
                results = parse_checksum_row(line)
Beispiel #15
0
def verify_flexsharded_csv_backup(shard_type, date, dev_bucket=False):
    """ Verify that a flexsharded data set has been backed up to hive

    Args:
        shard_type -  i.e. 'commercefeeddb', etc
        date - The date to search for
        dev_bucket - Look in the dev bucket?

    Returns:
        True for no problems found, False otherwise.
    """
    success = True
    replica_sets = set()
    zk = host_utils.MysqlZookeeper()

    # Figure out what replica sets to check based on a prefix
    for replica_set in zk.get_all_mysql_replica_sets():
        if replica_set.startswith(
                environment_specific.FLEXSHARD_DBS[shard_type]['zk_prefix']):
            replica_sets.add(replica_set)

    # Example schema host
    schema_host = zk.get_mysql_instance_from_replica_set(
        environment_specific.FLEXSHARD_DBS[shard_type]
        ['example_shard_replica_set'],
        repl_type=host_utils.REPLICA_ROLE_SLAVE)

    boto_conn = boto.connect_s3()
    bucket_name = environment_specific.S3_CSV_BUCKET_DEV if dev_bucket \
                    else environment_specific.S3_CSV_BUCKET
    bucket = boto_conn.get_bucket(bucket_name, validate=False)
    missing_uploads = set()

    for db in mysql_lib.get_dbs(schema_host):
        table_list = [
            '{}.{}'.format(db, x)
            for x in mysql_lib.get_tables(schema_host, db, True)
        ]
        table_tuples = backup.filter_tables_to_csv_backup(
            schema_host, table_list)

        for t in table_tuples:
            try:
                verify_csv_schema_upload(schema_host,
                                         db, [t[0].split('.')[1]],
                                         date=date,
                                         dev_bucket=dev_bucket)
            except:
                continue

            table_missing_uploads = set()
            for replica_set in replica_sets:
                chk_instance = zk.get_mysql_instance_from_replica_set(
                    replica_set)
                (_, data_path, success_path) = backup.get_csv_backup_paths(
                    chk_instance,
                    db,
                    t[0].split('.')[1],
                    date=date,
                    partition_number=t[2])

                k = bucket.get_key(data_path)
                if k is None:
                    table_missing_uploads.add(data_path)
                    success = False
                elif k.size == 0:
                    # we should not have zero-length files, because even if
                    # we send zero bytes to lzop, there's a 55-byte header.
                    # so, if this actually happened, it probably means that
                    # something is wrong.  delete the key and add it to the
                    # missing_uploads list so that we'll try again.
                    k.delete()
                    table_missing_uploads.add(data_path)
                    success = False

            if not table_missing_uploads and not bucket.get_key(success_path):
                print 'Creating success key {b}/{k}'.format(b=bucket_name,
                                                            k=success_path)
                key = bucket.new_key(success_path)
                key.set_contents_from_string(' ')

            missing_uploads.update(table_missing_uploads)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print('Shard type {} is missing uploads:'.format(shard_type))
            pprint.pprint(missing_uploads)
        else:
            print('Shard type {shard_type} is missing {num} uploads'
                  ''.format(num=len(missing_uploads), shard_type=shard_type))

    if not missing_uploads and success:
        print 'Shard type {} is backed up'.format(shard_type)

    return success
Beispiel #16
0
def verify_unsharded_csv_backups(instance, date, dev_bucket=False):
    """ Verify csv backups for an instance which is not part of a sharded
        system

    Args:
        instance - The instance to inspect for backups being done
        date - The date to search for
        dev_bucket - Use the dev bucket?

    Returns:
        True for no problems found, False otherwise.
    """
    return_status = True
    boto_conn = boto.connect_s3()
    bucket_name = environment_specific.S3_CSV_BUCKET_DEV if dev_bucket \
                    else environment_specific.S3_CSV_BUCKET
    bucket = boto_conn.get_bucket(bucket_name, validate=False)
    missing_uploads = set()
    for db in mysql_lib.get_dbs(instance):
        table_list = [
            '{}.{}'.format(db, x)
            for x in mysql_lib.get_tables(instance, db, True)
        ]
        table_tuples = backup.filter_tables_to_csv_backup(instance, table_list)
        try:
            verify_csv_schema_upload(
                instance, db, [x[0].split('.')[1] for x in table_tuples], date,
                dev_bucket)
        except Exception as e:
            print e
            return_status = False
            continue

        table_names = [x[0] for x in table_tuples]
        expected_partitions = dict(
            (x, table_names.count(x)) for x in table_names)
        found_partitions = dict()

        for t in table_tuples:
            (_, data_path, success_path) = \
                backup.get_csv_backup_paths(instance, *t[0].split('.'), date=date,
                                            partition_number=t[2])
            k = bucket.get_key(data_path)
            if k is None:
                missing_uploads.add(data_path)
            elif k.size == 0:
                # we should not have zero-length files, because even if
                # we send zero bytes to lzop, there's a 55-byte header.
                # so, if this actually happened, it probably means that
                # something is wrong.  delete the key and add it to the
                # missing_uploads list so that we'll try again.
                k.delete()
                missing_uploads.add(data_path)
            else:
                found_partitions[t[0]] = 1 + found_partitions.get(t[0], 0)

            # We still need to create a success file for the data
            # team for this table, even if something else is AWOL
            # later in the backup.
            s_key = bucket.get_key(success_path)
            if s_key:
                if found_partitions.get(t[0], 0) < expected_partitions[t[0]]:
                    print(
                        'Success key {b}/{k} exists but it should '
                        'not - deleting it!').format(b=bucket_name,
                                                     k=success_path)
                    s_key.delete()
                elif found_partitions.get(t[0],
                                          0) == expected_partitions[t[0]]:
                    print 'Success key {b}/{k} exists!'.format(b=bucket_name,
                                                               k=success_path)
            elif found_partitions.get(t[0], 0) == expected_partitions[t[0]]:
                print 'Creating success key {b}/{k}'.format(b=bucket_name,
                                                            k=success_path)
                key = bucket.new_key(success_path)
                key.set_contents_from_string(' ')

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {}'.format(missing_uploads)
        else:
            print 'Missing {} uploads'.format(len(missing_uploads))
        return_status = False

    if return_status:
        log_csv_backup_success(instance, date, dev_bucket)
    return return_status
Beispiel #17
0
def verify_csv_instance_backup(instance, date, dev_bucket=False):
    """ Verify that an instance has been backed up to hive

    Args:
        instance - The instance to inspect for backups being done
        date - The date to search for
        dev_bucket - Check the dev bucket?

    Returns:
        True for no problems found, False otherwise.
    """
    return_status = True
    missing_uploads = set()

    if csv_backup_success_logged(instance, date, dev_bucket):
        print('Per csv backup success log, backup has already been '
              'verified')
        return True

    if early_verification(date, instance):
        return True

    # We might be looking at an instance that is part of a sharded system; if
    # so we will only look at what DBs are supposed to exist on the instance
    # otherwise, we will check all DBs. Note, we only set the success flag
    # for unsharded systems.
    zk = host_utils.MysqlZookeeper()
    replica_set = zk.get_replica_set_from_instance(instance)
    shards = zk.get_shards_by_replica_set()[replica_set]

    if shards and not is_sharded_but_not_sharded(replica_set):
        instance_shard_type_mapping = dict()
        missing_uploads = set()
        for shard in shards:
            (s, ns, _) = environment_specific.deconstruct_shard_name(shard)
            shard_type = ''.join([s, ns])
            if shard_type not in instance_shard_type_mapping:
                instance_shard_type_mapping[shard_type] = set()
            instance_shard_type_mapping[shard_type].add(shard)

        for shard_type in instance_shard_type_mapping:
            example_shard = list(instance_shard_type_mapping[shard_type])[0]
            (_, db) = zk.map_shard_to_replica_and_db(example_shard)
            table_list = [
                '{}.{}'.format(db, x)
                for x in mysql_lib.get_tables(instance, db, True)
            ]
            table_tuples = backup.filter_tables_to_csv_backup(
                instance, table_list)
            missing_uploads.update(
                verify_sharded_csv_backup_by_shards(
                    instance_shard_type_mapping[shard_type], table_tuples,
                    date, dev_bucket)[1])

        if missing_uploads:
            if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
                print('Instance {} is missing uploads:'.format(instance))
                pprint.pprint(missing_uploads)
            else:
                print('Instance {instance} is missing {num} uploads'
                      ''.format(num=len(missing_uploads), instance=instance))
            return_status = False
    else:
        return_status = verify_unsharded_csv_backups(instance, date,
                                                     dev_bucket)

    if return_status:
        print('Instance {} is backed up'.format(instance))
    return return_status
Beispiel #18
0
def verify_sharded_csv_backup_by_shard_type(shard_type,
                                            date,
                                            dev_bucket=False):
    """ Verify that a sharded data set has been backed up to hive

    Args:
        shard_type -  i.e. 'sharddb', etc
        date - The date to search for
        dev_bucket - Look in the dev bucket

    Returns:
        True for no problems found, False otherwise.
    """
    zk = host_utils.MysqlZookeeper()
    (replica_set,
     db) = zk.get_example_db_and_replica_set_for_shard_type(shard_type)
    schema_host = zk.get_mysql_instance_from_replica_set(
        replica_set, repl_type=host_utils.REPLICA_ROLE_SLAVE)

    table_list = [
        '{}.{}'.format(db, x)
        for x in mysql_lib.get_tables(schema_host, db, True)
    ]
    table_tuples = backup.filter_tables_to_csv_backup(schema_host, table_list)

    if not table_tuples:
        raise Exception('No tables will be checked for backups')

    verify_csv_schema_upload(schema_host, db,
                             [x[0].split('.')[1] for x in table_tuples], date,
                             dev_bucket)

    shards = zk.get_shards_by_shard_type(shard_type)
    if not shards:
        raise Exception('No shards will be checked for backups')

    (finished_uploads, missing_uploads) = verify_sharded_csv_backup_by_shards(
        shards, table_tuples, date, dev_bucket)

    if finished_uploads:
        boto_conn = boto.connect_s3()
        bucket_name = environment_specific.S3_CSV_BUCKET_DEV \
            if dev_bucket else environment_specific.S3_CSV_BUCKET
        bucket = boto_conn.get_bucket(bucket_name, validate=False)
        for tbl in finished_uploads:
            (_, _, success_path) = backup.get_csv_backup_paths(
                schema_host, db, tbl, date)
            if not bucket.get_key(success_path):
                print 'Creating success key {b}/{k}'.format(b=bucket_name,
                                                            k=success_path)
                key = bucket.new_key(success_path)
                key.set_contents_from_string(' ')

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print('Shard type {} is missing uploads:'.format(shard_type))
            pprint.pprint(missing_uploads)
        else:
            print('Shard type {shard_type} is missing {num} uploads'
                  ''.format(num=len(missing_uploads), shard_type=shard_type))
        return False
    else:
        # we have checked all shards, all are good, create success files
        # that might not already be present.  theoretically, everything here
        # will get picked up by the finished_uploads stanza earlier, but
        # we have this here as a failsafe.
        boto_conn = boto.connect_s3()
        bucket_name = environment_specific.S3_CSV_BUCKET_DEV \
            if dev_bucket else environment_specific.S3_CSV_BUCKET
        bucket = boto_conn.get_bucket(bucket_name, validate=False)
        for t in table_tuples:
            (_, _,
             success_path) = backup.get_csv_backup_paths(schema_host,
                                                         *t[0].split('.'),
                                                         date=date,
                                                         partition_number=t[2])
            if not bucket.get_key(success_path):
                print 'Creating success key {b}/{k}'.format(b=bucket_name,
                                                            k=success_path)
                key = bucket.new_key(success_path)
                key.set_contents_from_string(' ')
        print 'Shard type {} is backed up'.format(shard_type)

    return True