コード例 #1
0
def verify_csv_schema_upload(shard_type, date, instance, schema_db, tables):
    """ Confirm that schema files are uploaded

    Args:
    shard_type - In this case, a hostname or shard type (generally
                 one in the same)
    date - The date to search for
    schema_host - A host to examine to find which tables should exist
    schema_db - Which db to inxpect on schema_host
    tables - A set of which tables to check in schema_db for schema upload

    Returns True for no problems found, False otherwise.
    """
    return_status = True
    missing = set()
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    for table in tables:
        (path, _, _) = environment_specific.get_csv_backup_paths(
            date, schema_db, table, instance.replica_type,
            instance.get_zk_replica_set()[0])
        if not bucket.get_key(path):
            missing.add(path)
            return_status = False

    if missing:
        print 'Expected schema files are missing: {missing}'.format(
            missing=missing)
    return return_status
コード例 #2
0
def verify_csv_schema_upload(shard_type, date, instance, schema_db,
                             tables):
    """ Confirm that schema files are uploaded

    Args:
    shard_type - In this case, a hostname or shard type (generally
                 one in the same)
    date - The date to search for
    schema_host - A host to examine to find which tables should exist
    schema_db - Which db to inxpect on schema_host
    tables - A set of which tables to check in schema_db for schema upload

    Returns True for no problems found, False otherwise.
    """
    return_status = True
    missing = set()
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    for table in tables:
        (path, _, _) = environment_specific.get_csv_backup_paths(
                           date, schema_db, table,
                           instance.replica_type,
                           instance.get_zk_replica_set()[0])
        if not bucket.get_key(path):
            missing.add(path)
            return_status = False

    if missing:
        print 'Expected schema files are missing: {missing}'.format(missing=missing)
    return return_status
コード例 #3
0
def verify_unsharded_csv_backup(shard_type, date, instance):
    """ Verify that a non-sharded db has been backed up to hive

    Args:
    shard_type - In this case, a hostname prefix
    date - The date to search for
    instance - The actual instance to inspect for backups being done

    Returns True for no problems found, False otherwise.
    """
    return_status = True
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    missing_uploads = set()
    for db in mysql_lib.get_dbs(instance):
        tables = mysql_backup_csv.mysql_backup_csv(
            instance).get_tables_to_backup(db)
        for table in tables:
            if not verify_csv_schema_upload(shard_type, date, instance, db,
                                            set([table])):
                return_status = False
                print 'Missing schema for {db}.{table}'.format(db=db,
                                                               table=table)
                continue

            (_, data_path, success_path) = \
                environment_specific.get_csv_backup_paths(date, db, table,
                                                          instance.replica_type,
                                                          instance.get_zk_replica_set()[0])
            if not bucket.get_key(data_path):
                missing_uploads.add(data_path)
            else:
                # we still need to create a success file for the data
                # team for this table, even if something else is AWOL
                # later in the backup.
                if bucket.get_key(success_path):
                    print 'Key already exists {key}'.format(key=success_path)
                else:
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {uploads}'.format(uploads=missing_uploads)
        else:
            print 'Missing {num} uploads'.format(num=len(missing_uploads))
        return_status = False

    return return_status
コード例 #4
0
def verify_unsharded_csv_backup(shard_type, date, instance):
    """ Verify that a non-sharded db has been backed up to hive

    Args:
    shard_type - In this case, a hostname prefix
    date - The date to search for
    instance - The actual instance to inspect for backups being done

    Returns True for no problems found, False otherwise.
    """
    return_status = True
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    missing_uploads = set()
    for db in mysql_lib.get_dbs(instance):
        tables = mysql_backup_csv.mysql_backup_csv(instance).get_tables_to_backup(db)
        for table in tables:
            if not verify_csv_schema_upload(shard_type, date, instance, db,
                                            set([table])):
                return_status = False
                print 'Missing schema for {db}.{table}'.format(db=db,
                                                               table=table)
                continue

            (_, data_path, success_path) = \
                environment_specific.get_csv_backup_paths(date, db, table,
                                                          instance.replica_type,
                                                          instance.get_zk_replica_set()[0])
            if not bucket.get_key(data_path):
                missing_uploads.add(data_path)
            else:
                # we still need to create a success file for the data
                # team for this table, even if something else is AWOL
                # later in the backup.
                if bucket.get_key(success_path):
                    print 'Key already exists {key}'.format(key=success_path)
                else:
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {uploads}'.format(uploads=missing_uploads)
        else:
            print 'Missing {num} uploads'.format(num=len(missing_uploads))
        return_status = False

    return return_status
コード例 #5
0
    def already_backed_up(self, db):
        """ Check to see if a db has already been uploaded to s3

        Args:
        db - The db to check for being backed up

        Returns:
        bool - True if the db has already been backed up, False otherwise
        """
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
        for table in self.get_tables_to_backup(db):
            (_, data_path, _) = environment_specific.get_csv_backup_paths(
                self.datestamp, db, table, self.instance.replica_type,
                self.instance.get_zk_replica_set()[0])
            if not bucket.get_key(data_path):
                return False
        return True
コード例 #6
0
    def already_backed_up(self, db):
        """ Check to see if a db has already been uploaded to s3

        Args:
        db - The db to check for being backed up

        Returns:
        bool - True if the db has already been backed up, False otherwise
        """
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
        for table in self.get_tables_to_backup(db):
            (_, data_path, _) = environment_specific.get_csv_backup_paths(
                                           self.datestamp, db, table,
                                           self.instance.replica_type,
                                           self.instance.get_zk_replica_set()[0])
            if not bucket.get_key(data_path):
                return False
        return True
コード例 #7
0
def get_missing_uploads(args):
    """ Check to see if all backups are present

    Args: A tuple which can be expanded to:
    table - table name
    shard_type -  sharddb, etc
    shards -  a set of shards

    Returns: a set of shards which are not backed up
    """
    (table, shard_type, date, shards) = args
    expected_s3_keys = set()
    prefix = None

    for shard in shards:
        (_, data_path, _) = environment_specific.get_csv_backup_paths(
            date, environment_specific.convert_shard_to_db(shard), table,
            shard_type)
        expected_s3_keys.add(data_path)
        if not prefix:
            prefix = os.path.dirname(data_path)

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    uploaded_keys = set()
    for key in bucket.list(prefix=prefix):
        uploaded_keys.add(key.name)

    missing_uploads = expected_s3_keys.difference(uploaded_keys)

    for entry in copy.copy(missing_uploads):
        # the list api occassionally has issues, so we will recheck any missing
        # entries. If any are actually missing we will quit checking because
        # there is definitely work that needs to be done
        if bucket.get_key(entry):
            print 'List method erronious did not return data for key:{entry}'.format(
                entry=entry)
            missing_uploads.discard(entry)
        else:
            return missing_uploads

    return missing_uploads
コード例 #8
0
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        (schema_path, _, _) = environment_specific.get_csv_backup_paths(
            self.datestamp, db, table, self.instance.replica_type,
            self.instance.get_zk_replica_set()[0])
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {schema_path}'
                  ''.format(schema_path=schema_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
        key = bucket.new_key(schema_path)
        key.set_contents_from_string(create_stm)
コード例 #9
0
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        (schema_path, _, _) = environment_specific.get_csv_backup_paths(
                                     self.datestamp, db, table,
                                     self.instance.replica_type,
                                     self.instance.get_zk_replica_set()[0])
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {schema_path}'
                  ''.format(schema_path=schema_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
        key = bucket.new_key(schema_path)
        key.set_contents_from_string(create_stm)
コード例 #10
0
def get_missing_uploads(args):
    """ Check to see if all backups are present

    Args: A tuple which can be expanded to:
    table - table name
    shard_type -  sharddb, etc
    shards -  a set of shards

    Returns: a set of shards which are not backed up
    """
    (table, shard_type, date, shards) = args
    expected_s3_keys = set()
    prefix = None

    for shard in shards:
        (_, data_path, _) = environment_specific.get_csv_backup_paths(
                                date, environment_specific.convert_shard_to_db(shard),
                                table, shard_type)
        expected_s3_keys.add(data_path)
        if not prefix:
            prefix = os.path.dirname(data_path)

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    uploaded_keys = set()
    for key in bucket.list(prefix=prefix):
        uploaded_keys.add(key.name)

    missing_uploads = expected_s3_keys.difference(uploaded_keys)

    for entry in copy.copy(missing_uploads):
        # the list api occassionally has issues, so we will recheck any missing
        # entries. If any are actually missing we will quit checking because
        # there is definitely work that needs to be done
        if bucket.get_key(entry):
            print 'List method erronious did not return data for key:{entry}'.format(entry=entry)
            missing_uploads.discard(entry)
        else:
            return missing_uploads

    return missing_uploads
コード例 #11
0
    def mysql_backup_csv_table(self, db, table, tmp_dir_db, conn):
        """ Back up a single table of a single db

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        conn - a connection the the mysql instance
        """
        proc_id = multiprocessing.current_process().name
        (_, data_path, _) = environment_specific.get_csv_backup_paths(
                                self.datestamp, db, table,
                                self.instance.replica_type,
                                self.instance.get_zk_replica_set()[0])
        log.debug('{proc_id}: {db}.{table} dump to {path} started'
                  ''.format(proc_id=proc_id,
                            db=db,
                            table=table,
                            path=data_path))
        self.upload_schema(db, table, tmp_dir_db)
        fifo = os.path.join(tmp_dir_db, table)
        procs = dict()
        try:
            # giant try so we can try to clean things up in case of errors
            self.create_fifo(fifo)

            # Start creating processes
            procs['cat'] = subprocess.Popen(['cat', fifo],
                                            stdout=subprocess.PIPE)
            procs['nullescape'] = subprocess.Popen(['nullescape'],
                                                   stdin=procs['cat'].stdout,
                                                   stdout=subprocess.PIPE)
            procs['lzop'] = subprocess.Popen(['lzop'],
                                             stdin=procs['nullescape'].stdout,
                                             stdout=subprocess.PIPE)

            # Start dump query
            return_value = set()
            query_thread = threading.Thread(target=self.run_dump_query,
                                            args=(db, table, fifo,
                                                  conn, procs['cat'], return_value))
            query_thread.daemon = True
            query_thread.start()

            # And run the upload
            safe_uploader.safe_upload(precursor_procs=procs,
                                      stdin=procs['lzop'].stdout,
                                      bucket=self.upload_bucket,
                                      key=data_path,
                                      check_func=self.check_dump_success,
                                      check_arg=return_value)
            os.remove(fifo)
            log.debug('{proc_id}: {db}.{table} clean up complete'
                      ''.format(proc_id=proc_id,
                                db=db,
                                table=table))
        except:
            log.debug('{proc_id}: in exception handling for failed table upload'
                      ''.format(proc_id=proc_id))

            if os.path.exists(fifo):
                self.cleanup_fifo(fifo)

            safe_uploader.kill_precursor_procs(procs)
            raise
コード例 #12
0
def verify_flexsharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a flexsharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'commercefeeddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """
    success = True
    replica_sets = set()
    zk = host_utils.MysqlZookeeper()
    if instance:
        replica_sets.add(zk.get_replica_set_from_instance(instance)[0])
    else:
        for replica_set in zk.get_all_mysql_replica_sets():
            if replica_set.startswith(
                    environment_specific.FLEXSHARD_DBS[shard_type]
                ['zk_prefix']):
                replica_sets.add(replica_set)

    schema_host = zk.get_mysql_instance_from_replica_set(
        environment_specific.FLEXSHARD_DBS[shard_type]
        ['example_shard_replica_set'],
        repl_type=host_utils.REPLICA_ROLE_SLAVE)

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    missing_uploads = set()

    for db in mysql_lib.get_dbs(schema_host):
        for table in mysql_backup_csv.mysql_backup_csv(
                schema_host).get_tables_to_backup(db):
            if not verify_csv_schema_upload(shard_type, date, schema_host, db,
                                            [table]):
                success = False
                continue

            table_missing_uploads = set()
            for replica_set in replica_sets:
                chk_instance = zk.get_mysql_instance_from_replica_set(
                    replica_set)
                (_, data_path,
                 success_path) = environment_specific.get_csv_backup_paths(
                     date, db, table, chk_instance.replica_type,
                     chk_instance.get_zk_replica_set()[0])
                if not bucket.get_key(data_path):
                    table_missing_uploads.add(data_path)
                    success = False

            if not table_missing_uploads and not instance:
                if not bucket.get_key(success_path):
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')

            missing_uploads.update(table_missing_uploads)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print('Shard type {shard_type} is missing uploads:'
                  ''.format(shard_type=shard_type))
            pprint.pprint(missing_uploads)
        else:
            print('Shard type {shard_type} is missing {num} uploads'
                  ''.format(num=len(missing_uploads), shard_type=shard_type))

    if not missing_uploads and not instance and success:
        print 'Shard type {shard_type} is backed up'.format(
            shard_type=shard_type)

    return success
コード例 #13
0
def verify_sharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a sharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'sharddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """
    zk = host_utils.MysqlZookeeper()
    example_shard = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type][
        'example_shard']
    schema_host = zk.shard_to_instance(example_shard,
                                       repl_type=host_utils.REPLICA_ROLE_SLAVE)
    tables = mysql_backup_csv.mysql_backup_csv(
        schema_host).get_tables_to_backup(
            environment_specific.convert_shard_to_db(example_shard))
    success = verify_csv_schema_upload(
        shard_type, date, schema_host,
        environment_specific.convert_shard_to_db(example_shard), tables)
    if instance:
        host_shard_map = zk.get_host_shard_map()
        (replica_set,
         replica_type) = zk.get_replica_set_from_instance(instance)
        master = zk.get_mysql_instance_from_replica_set(
            replica_set, host_utils.REPLICA_ROLE_MASTER)
        shards = host_shard_map[master.__str__()]
    else:
        shards = zk.get_shards_by_shard_type(shard_type)

    pool = multiprocessing.Pool(processes=CSV_CHECK_PROCESSES)
    pool_args = list()
    if not tables:
        raise Exception('No tables will be checked for backups')
    if not shards:
        raise Exception('No shards will be checked for backups')

    for table in tables:
        pool_args.append((table, shard_type, date, shards))
    results = pool.map(get_missing_uploads, pool_args)
    missing_uploads = set()
    for result in results:
        missing_uploads.update(result)

    if missing_uploads or not success:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print('Shard type {shard_type} is missing uploads:'
                  ''.format(shard_type=shard_type))
            pprint.pprint(missing_uploads)
        else:
            print('Shard type {shard_type} is missing {num} uploads'
                  ''.format(num=len(missing_uploads), shard_type=shard_type))
        return False
    else:
        if instance:
            print 'Instance {instance} is backed up'.format(instance=instance)
        else:
            # we have checked all shards, all are good, create success files
            boto_conn = boto.connect_s3()
            bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                          validate=False)
            for table in tables:
                (_, _,
                 success_path) = environment_specific.get_csv_backup_paths(
                     date,
                     environment_specific.convert_shard_to_db(example_shard),
                     table, shard_type)
                if not bucket.get_key(success_path):
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')
            print 'Shard type {shard_type} is backed up'.format(
                shard_type=shard_type)

        return True
コード例 #14
0
    def mysql_backup_csv_table(self, db, table, tmp_dir_db, conn):
        """ Back up a single table of a single db

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        conn - a connection the the mysql instance
        """
        proc_id = multiprocessing.current_process().name
        (_, data_path, _) = environment_specific.get_csv_backup_paths(
            self.datestamp, db, table, self.instance.replica_type,
            self.instance.get_zk_replica_set()[0])
        log.debug('{proc_id}: {db}.{table} dump to {path} started'
                  ''.format(proc_id=proc_id,
                            db=db,
                            table=table,
                            path=data_path))
        self.upload_schema(db, table, tmp_dir_db)
        fifo = os.path.join(tmp_dir_db, table)
        procs = dict()
        try:
            # giant try so we can try to clean things up in case of errors
            self.create_fifo(fifo)

            # Start creating processes
            procs['cat'] = subprocess.Popen(['cat', fifo],
                                            stdout=subprocess.PIPE)
            procs['nullescape'] = subprocess.Popen(['nullescape'],
                                                   stdin=procs['cat'].stdout,
                                                   stdout=subprocess.PIPE)
            procs['lzop'] = subprocess.Popen(['lzop'],
                                             stdin=procs['nullescape'].stdout,
                                             stdout=subprocess.PIPE)

            # Start dump query
            return_value = set()
            query_thread = threading.Thread(target=self.run_dump_query,
                                            args=(db, table, fifo, conn,
                                                  procs['cat'], return_value))
            query_thread.daemon = True
            query_thread.start()

            # And run the upload
            safe_uploader.safe_upload(
                precursor_procs=procs,
                stdin=procs['lzop'].stdout,
                bucket=environment_specific.S3_CSV_BUCKET,
                key=data_path,
                check_func=self.check_dump_success,
                check_arg=return_value)
            os.remove(fifo)
            log.debug('{proc_id}: {db}.{table} clean up complete'
                      ''.format(proc_id=proc_id, db=db, table=table))
        except:
            log.debug(
                '{proc_id}: in exception handling for failed table upload'
                ''.format(proc_id=proc_id))

            if os.path.exists(fifo):
                self.cleanup_fifo(fifo)

            safe_uploader.kill_precursor_procs(procs)

            raise
コード例 #15
0
def verify_flexsharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a flexsharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'commercefeeddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """
    success = True
    replica_sets = set()
    zk = host_utils.MysqlZookeeper()
    if instance:
        replica_sets.add(zk.get_replica_set_from_instance(instance)[0])
    else:
        for replica_set in zk.get_all_mysql_replica_sets():
            if replica_set.startswith(environment_specific.FLEXSHARD_DBS[shard_type]['zk_prefix']):
                replica_sets.add(replica_set)

    schema_host = zk.get_mysql_instance_from_replica_set(
            environment_specific.FLEXSHARD_DBS[shard_type]['example_shard_replica_set'],
            repl_type=host_utils.REPLICA_ROLE_SLAVE)

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    missing_uploads = set()

    for db in mysql_lib.get_dbs(schema_host):
        for table in mysql_backup_csv.mysql_backup_csv(schema_host).get_tables_to_backup(db):
            if not verify_csv_schema_upload(shard_type, date, schema_host, db, [table]):
                success = False
                continue

            table_missing_uploads = set()
            for replica_set in replica_sets:
                chk_instance = zk.get_mysql_instance_from_replica_set(replica_set)
                (_, data_path, success_path) = environment_specific.get_csv_backup_paths(
                                                   date, db, table, chk_instance.replica_type,
                                                   chk_instance.get_zk_replica_set()[0])
                if not bucket.get_key(data_path):
                    table_missing_uploads.add(data_path)
                    success = False

            if not table_missing_uploads and not instance:
                if not bucket.get_key(success_path):
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')

            missing_uploads.update(table_missing_uploads)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print ('Shard type {shard_type} is missing uploads:'
                   ''.format(shard_type=shard_type))
            pprint.pprint(missing_uploads)
        else:
            print ('Shard type {shard_type} is missing {num} uploads'
                   ''.format(num=len(missing_uploads),
                             shard_type=shard_type))

    if not missing_uploads and not instance and success:
        print 'Shard type {shard_type} is backed up'.format(shard_type=shard_type)

    return success
コード例 #16
0
def verify_sharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a sharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'sharddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """
    zk = host_utils.MysqlZookeeper()
    example_shard = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type]['example_shard']
    schema_host = zk.shard_to_instance(example_shard, repl_type=host_utils.REPLICA_ROLE_SLAVE)
    tables = mysql_backup_csv.mysql_backup_csv(schema_host).get_tables_to_backup(environment_specific.convert_shard_to_db(example_shard))
    success = verify_csv_schema_upload(shard_type, date, schema_host,
                                       environment_specific.convert_shard_to_db(example_shard), tables)
    if instance:
        host_shard_map = zk.get_host_shard_map()
        (replica_set, replica_type) = zk.get_replica_set_from_instance(instance)
        master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
        shards = host_shard_map[master.__str__()]
    else:
        shards = zk.get_shards_by_shard_type(shard_type)

    pool = multiprocessing.Pool(processes=CSV_CHECK_PROCESSES)
    pool_args = list()
    if not tables:
        raise Exception('No tables will be checked for backups')
    if not shards:
        raise Exception('No shards will be checked for backups')

    for table in tables:
        pool_args.append((table, shard_type, date, shards))
    results = pool.map(get_missing_uploads, pool_args)
    missing_uploads = set()
    for result in results:
        missing_uploads.update(result)

    if missing_uploads or not success:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print ('Shard type {shard_type} is missing uploads:'
                   ''.format(shard_type=shard_type))
            pprint.pprint(missing_uploads)
        else:
            print ('Shard type {shard_type} is missing {num} uploads'
                   ''.format(num=len(missing_uploads),
                             shard_type=shard_type))
        return False
    else:
        if instance:
            print 'Instance {instance} is backed up'.format(instance=instance)
        else:
            # we have checked all shards, all are good, create success files
            boto_conn = boto.connect_s3()
            bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
            for table in tables:
                (_, _, success_path) = environment_specific.get_csv_backup_paths(date,
                                                                                 environment_specific.convert_shard_to_db(example_shard),
                                                                                 table, shard_type)
                if not bucket.get_key(success_path):
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')
            print 'Shard type {shard_type} is backed up'.format(shard_type=shard_type)

        return True