def verify_unsharded_csv_backup(shard_type, date, instance): """ Verify that a non-sharded db has been backed up to hive Args: shard_type - In this case, a hostname prefix date - The date to search for instance - The actual instance to inspect for backups being done Returns True for no problems found, False otherwise. """ return_status = True boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) missing_uploads = set() for db in mysql_lib.get_dbs(instance): tables = mysql_backup_csv.mysql_backup_csv( instance).get_tables_to_backup(db) for table in tables: if not verify_csv_schema_upload(shard_type, date, instance, db, set([table])): return_status = False print 'Missing schema for {db}.{table}'.format(db=db, table=table) continue (_, data_path, success_path) = \ environment_specific.get_csv_backup_paths(date, db, table, instance.replica_type, instance.get_zk_replica_set()[0]) if not bucket.get_key(data_path): missing_uploads.add(data_path) else: # we still need to create a success file for the data # team for this table, even if something else is AWOL # later in the backup. if bucket.get_key(success_path): print 'Key already exists {key}'.format(key=success_path) else: print 'Creating success key {key}'.format(key=success_path) key = bucket.new_key(success_path) key.set_contents_from_string('') if missing_uploads: if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT: print 'Missing uploads: {uploads}'.format(uploads=missing_uploads) else: print 'Missing {num} uploads'.format(num=len(missing_uploads)) return_status = False return return_status
def verify_unsharded_csv_backup(shard_type, date, instance): """ Verify that a non-sharded db has been backed up to hive Args: shard_type - In this case, a hostname prefix date - The date to search for instance - The actual instance to inspect for backups being done Returns True for no problems found, False otherwise. """ return_status = True boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) missing_uploads = set() for db in mysql_lib.get_dbs(instance): tables = mysql_backup_csv.mysql_backup_csv(instance).get_tables_to_backup(db) for table in tables: if not verify_csv_schema_upload(shard_type, date, instance, db, set([table])): return_status = False print 'Missing schema for {db}.{table}'.format(db=db, table=table) continue (_, data_path, success_path) = \ environment_specific.get_csv_backup_paths(date, db, table, instance.replica_type, instance.get_zk_replica_set()[0]) if not bucket.get_key(data_path): missing_uploads.add(data_path) else: # we still need to create a success file for the data # team for this table, even if something else is AWOL # later in the backup. if bucket.get_key(success_path): print 'Key already exists {key}'.format(key=success_path) else: print 'Creating success key {key}'.format(key=success_path) key = bucket.new_key(success_path) key.set_contents_from_string('') if missing_uploads: if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT: print 'Missing uploads: {uploads}'.format(uploads=missing_uploads) else: print 'Missing {num} uploads'.format(num=len(missing_uploads)) return_status = False return return_status
def verify_flexsharded_csv_backup(shard_type, date, instance=None): """ Verify that a flexsharded data set has been backed up to hive Args: shard_type - i.e. 'commercefeeddb', etc date - The date to search for instance - Restrict the search to problem on a single instnace Returns True for no problems found, False otherwise. """ success = True replica_sets = set() zk = host_utils.MysqlZookeeper() if instance: replica_sets.add(zk.get_replica_set_from_instance(instance)[0]) else: for replica_set in zk.get_all_mysql_replica_sets(): if replica_set.startswith( environment_specific.FLEXSHARD_DBS[shard_type] ['zk_prefix']): replica_sets.add(replica_set) schema_host = zk.get_mysql_instance_from_replica_set( environment_specific.FLEXSHARD_DBS[shard_type] ['example_shard_replica_set'], repl_type=host_utils.REPLICA_ROLE_SLAVE) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) missing_uploads = set() for db in mysql_lib.get_dbs(schema_host): for table in mysql_backup_csv.mysql_backup_csv( schema_host).get_tables_to_backup(db): if not verify_csv_schema_upload(shard_type, date, schema_host, db, [table]): success = False continue table_missing_uploads = set() for replica_set in replica_sets: chk_instance = zk.get_mysql_instance_from_replica_set( replica_set) (_, data_path, success_path) = environment_specific.get_csv_backup_paths( date, db, table, chk_instance.replica_type, chk_instance.get_zk_replica_set()[0]) if not bucket.get_key(data_path): table_missing_uploads.add(data_path) success = False if not table_missing_uploads and not instance: if not bucket.get_key(success_path): print 'Creating success key {key}'.format(key=success_path) key = bucket.new_key(success_path) key.set_contents_from_string('') missing_uploads.update(table_missing_uploads) if missing_uploads: if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT: print('Shard type {shard_type} is missing uploads:' ''.format(shard_type=shard_type)) pprint.pprint(missing_uploads) else: print('Shard type {shard_type} is missing {num} uploads' ''.format(num=len(missing_uploads), shard_type=shard_type)) if not missing_uploads and not instance and success: print 'Shard type {shard_type} is backed up'.format( shard_type=shard_type) return success
def verify_sharded_csv_backup(shard_type, date, instance=None): """ Verify that a sharded data set has been backed up to hive Args: shard_type - i.e. 'sharddb', etc date - The date to search for instance - Restrict the search to problem on a single instnace Returns True for no problems found, False otherwise. """ zk = host_utils.MysqlZookeeper() example_shard = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type][ 'example_shard'] schema_host = zk.shard_to_instance(example_shard, repl_type=host_utils.REPLICA_ROLE_SLAVE) tables = mysql_backup_csv.mysql_backup_csv( schema_host).get_tables_to_backup( environment_specific.convert_shard_to_db(example_shard)) success = verify_csv_schema_upload( shard_type, date, schema_host, environment_specific.convert_shard_to_db(example_shard), tables) if instance: host_shard_map = zk.get_host_shard_map() (replica_set, replica_type) = zk.get_replica_set_from_instance(instance) master = zk.get_mysql_instance_from_replica_set( replica_set, host_utils.REPLICA_ROLE_MASTER) shards = host_shard_map[master.__str__()] else: shards = zk.get_shards_by_shard_type(shard_type) pool = multiprocessing.Pool(processes=CSV_CHECK_PROCESSES) pool_args = list() if not tables: raise Exception('No tables will be checked for backups') if not shards: raise Exception('No shards will be checked for backups') for table in tables: pool_args.append((table, shard_type, date, shards)) results = pool.map(get_missing_uploads, pool_args) missing_uploads = set() for result in results: missing_uploads.update(result) if missing_uploads or not success: if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT: print('Shard type {shard_type} is missing uploads:' ''.format(shard_type=shard_type)) pprint.pprint(missing_uploads) else: print('Shard type {shard_type} is missing {num} uploads' ''.format(num=len(missing_uploads), shard_type=shard_type)) return False else: if instance: print 'Instance {instance} is backed up'.format(instance=instance) else: # we have checked all shards, all are good, create success files boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) for table in tables: (_, _, success_path) = environment_specific.get_csv_backup_paths( date, environment_specific.convert_shard_to_db(example_shard), table, shard_type) if not bucket.get_key(success_path): print 'Creating success key {key}'.format(key=success_path) key = bucket.new_key(success_path) key.set_contents_from_string('') print 'Shard type {shard_type} is backed up'.format( shard_type=shard_type) return True
def verify_flexsharded_csv_backup(shard_type, date, instance=None): """ Verify that a flexsharded data set has been backed up to hive Args: shard_type - i.e. 'commercefeeddb', etc date - The date to search for instance - Restrict the search to problem on a single instnace Returns True for no problems found, False otherwise. """ success = True replica_sets = set() zk = host_utils.MysqlZookeeper() if instance: replica_sets.add(zk.get_replica_set_from_instance(instance)[0]) else: for replica_set in zk.get_all_mysql_replica_sets(): if replica_set.startswith(environment_specific.FLEXSHARD_DBS[shard_type]['zk_prefix']): replica_sets.add(replica_set) schema_host = zk.get_mysql_instance_from_replica_set( environment_specific.FLEXSHARD_DBS[shard_type]['example_shard_replica_set'], repl_type=host_utils.REPLICA_ROLE_SLAVE) boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) missing_uploads = set() for db in mysql_lib.get_dbs(schema_host): for table in mysql_backup_csv.mysql_backup_csv(schema_host).get_tables_to_backup(db): if not verify_csv_schema_upload(shard_type, date, schema_host, db, [table]): success = False continue table_missing_uploads = set() for replica_set in replica_sets: chk_instance = zk.get_mysql_instance_from_replica_set(replica_set) (_, data_path, success_path) = environment_specific.get_csv_backup_paths( date, db, table, chk_instance.replica_type, chk_instance.get_zk_replica_set()[0]) if not bucket.get_key(data_path): table_missing_uploads.add(data_path) success = False if not table_missing_uploads and not instance: if not bucket.get_key(success_path): print 'Creating success key {key}'.format(key=success_path) key = bucket.new_key(success_path) key.set_contents_from_string('') missing_uploads.update(table_missing_uploads) if missing_uploads: if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT: print ('Shard type {shard_type} is missing uploads:' ''.format(shard_type=shard_type)) pprint.pprint(missing_uploads) else: print ('Shard type {shard_type} is missing {num} uploads' ''.format(num=len(missing_uploads), shard_type=shard_type)) if not missing_uploads and not instance and success: print 'Shard type {shard_type} is backed up'.format(shard_type=shard_type) return success
def verify_sharded_csv_backup(shard_type, date, instance=None): """ Verify that a sharded data set has been backed up to hive Args: shard_type - i.e. 'sharddb', etc date - The date to search for instance - Restrict the search to problem on a single instnace Returns True for no problems found, False otherwise. """ zk = host_utils.MysqlZookeeper() example_shard = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type]['example_shard'] schema_host = zk.shard_to_instance(example_shard, repl_type=host_utils.REPLICA_ROLE_SLAVE) tables = mysql_backup_csv.mysql_backup_csv(schema_host).get_tables_to_backup(environment_specific.convert_shard_to_db(example_shard)) success = verify_csv_schema_upload(shard_type, date, schema_host, environment_specific.convert_shard_to_db(example_shard), tables) if instance: host_shard_map = zk.get_host_shard_map() (replica_set, replica_type) = zk.get_replica_set_from_instance(instance) master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER) shards = host_shard_map[master.__str__()] else: shards = zk.get_shards_by_shard_type(shard_type) pool = multiprocessing.Pool(processes=CSV_CHECK_PROCESSES) pool_args = list() if not tables: raise Exception('No tables will be checked for backups') if not shards: raise Exception('No shards will be checked for backups') for table in tables: pool_args.append((table, shard_type, date, shards)) results = pool.map(get_missing_uploads, pool_args) missing_uploads = set() for result in results: missing_uploads.update(result) if missing_uploads or not success: if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT: print ('Shard type {shard_type} is missing uploads:' ''.format(shard_type=shard_type)) pprint.pprint(missing_uploads) else: print ('Shard type {shard_type} is missing {num} uploads' ''.format(num=len(missing_uploads), shard_type=shard_type)) return False else: if instance: print 'Instance {instance} is backed up'.format(instance=instance) else: # we have checked all shards, all are good, create success files boto_conn = boto.connect_s3() bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False) for table in tables: (_, _, success_path) = environment_specific.get_csv_backup_paths(date, environment_specific.convert_shard_to_db(example_shard), table, shard_type) if not bucket.get_key(success_path): print 'Creating success key {key}'.format(key=success_path) key = bucket.new_key(success_path) key.set_contents_from_string('') print 'Shard type {shard_type} is backed up'.format(shard_type=shard_type) return True