def verify_csv_schema_upload(shard_type, date, schema_host, schema_db,
                             schema_upload_path_raw):
    """ Confirm that schema files are uploaded

    Args:
    shard_type - In this case, a hostname or shard type (generally
                 one in the same)
    date - The date to search for
    schema_host - A for to examine to find which tables should exist
    schema_db - Which db to inxpect on schema_host
    schema_upload_path_raw - A string that can be format'ed in order to create
                             a S3 key path

    Returns True for no problems found, False otherwise.
    """
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    tables = mysql_lib.get_tables(schema_host,
                                  environment_specific.convert_shard_to_db(schema_db),
                                  skip_views=True)
    return_status = True
    for table in tables:
        path = schema_upload_path_raw.format(table=table,
                                             hostname_prefix=shard_type,
                                             date=date,
                                             db_name=schema_db)
        if not bucket.get_key(path):
            print 'Expected key {key} is missing'.format(key=path)
            return_status = False
    return return_status, tables
Example #2
0
def verify_csv_schema_upload(shard_type, date, schema_host, schema_db,
                             schema_upload_path_raw):
    """ Confirm that schema files are uploaded

    Args:
    shard_type - In this case, a hostname or shard type (generally
                 one in the same)
    date - The date to search for
    schema_host - A for to examine to find which tables should exist
    schema_db - Which db to inxpect on schema_host
    schema_upload_path_raw - A string that can be format'ed in order to create
                             a S3 key path

    Returns True for no problems found, False otherwise.
    """
    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    tables = mysql_lib.get_tables(
        schema_host,
        environment_specific.convert_shard_to_db(schema_db),
        skip_views=True)
    return_status = True
    for table in tables:
        path = schema_upload_path_raw.format(table=table,
                                             hostname_prefix=shard_type,
                                             date=date,
                                             db_name=schema_db)
        if not bucket.get_key(path):
            print 'Expected key {key} is missing'.format(key=path)
            return_status = False
    return return_status, tables
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        if self.instance.replica_type in environment_specific.SHARDED_DBS_PREFIX_MAP:
            if db != environment_specific.convert_shard_to_db(
                    environment_specific.SHARDED_DBS_PREFIX_MAP[self.instance.replica_type]['example_schema']):
                return
            s3_path_raw = PATH_DAILY_BACKUP_SHARDED_SCHEMA
        else:
            s3_path_raw = PATH_DAILY_BACKUP_NONSHARDED_SCHEMA

        s3_path = s3_path_raw.format(table=table,
                                     hostname_prefix=self.instance.replica_type,
                                     date=self.datestamp,
                                     db_name=db)
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {s3_path}'
                  ''.format(s3_path=s3_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
        key = bucket.new_key(s3_path)
        key.set_contents_from_string(create_stm)
    def upload_schema(self, db, table, tmp_dir_db):
        """ Upload the schema of a table to s3

        Args:
        db - the db to be backed up
        table - the table to be backed up
        tmp_dir_db - temporary storage used for all tables in the db
        """
        if self.instance.replica_type in environment_specific.SHARDED_DBS_PREFIX_MAP:
            if db != environment_specific.convert_shard_to_db(
                    environment_specific.SHARDED_DBS_PREFIX_MAP[
                        self.instance.replica_type]['example_schema']):
                return
            s3_path_raw = PATH_DAILY_BACKUP_SHARDED_SCHEMA
        else:
            s3_path_raw = PATH_DAILY_BACKUP_NONSHARDED_SCHEMA

        s3_path = s3_path_raw.format(
            table=table,
            hostname_prefix=self.instance.replica_type,
            date=self.datestamp,
            db_name=db)
        create_stm = mysql_lib.show_create_table(self.instance, db, table)
        log.debug('{proc_id}: Uploading schema to {s3_path}'
                  ''.format(s3_path=s3_path,
                            proc_id=multiprocessing.current_process().name))
        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                      validate=False)
        key = bucket.new_key(s3_path)
        key.set_contents_from_string(create_stm)
def get_missing_uploads(args):
    """ Check to see if all backups are present

    Args: A tuple which can be expanded to:
    table - table name
    shard_type -  sharddb, etc
    shards -  a set of shards

    Returns: a set of shards which are not backed up
    """
    (table, shard_type, date, shards) = args
    expected_s3_keys = set()
    prefix = None

    for shard in shards:
        (_, data_path, _) = environment_specific.get_csv_backup_paths(
            date, environment_specific.convert_shard_to_db(shard), table,
            shard_type)
        expected_s3_keys.add(data_path)
        if not prefix:
            prefix = os.path.dirname(data_path)

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                  validate=False)
    uploaded_keys = set()
    for key in bucket.list(prefix=prefix):
        uploaded_keys.add(key.name)

    missing_uploads = expected_s3_keys.difference(uploaded_keys)

    for entry in copy.copy(missing_uploads):
        # the list api occassionally has issues, so we will recheck any missing
        # entries. If any are actually missing we will quit checking because
        # there is definitely work that needs to be done
        if bucket.get_key(entry):
            print 'List method erronious did not return data for key:{entry}'.format(
                entry=entry)
            missing_uploads.discard(entry)
        else:
            return missing_uploads

    return missing_uploads
def get_missing_uploads(args):
    """ Check to see if all backups are present

    Args: A tuple which can be expanded to:
    table - table name
    shard_type -  sharddb, etc
    shards -  a set of shards

    Returns: a set of shards which are not backed up
    """
    (table, shard_type, date, shards) = args
    expected_s3_keys = set()
    prefix = None

    for shard in shards:
        (_, data_path, _) = environment_specific.get_csv_backup_paths(
                                date, environment_specific.convert_shard_to_db(shard),
                                table, shard_type)
        expected_s3_keys.add(data_path)
        if not prefix:
            prefix = os.path.dirname(data_path)

    boto_conn = boto.connect_s3()
    bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
    uploaded_keys = set()
    for key in bucket.list(prefix=prefix):
        uploaded_keys.add(key.name)

    missing_uploads = expected_s3_keys.difference(uploaded_keys)

    for entry in copy.copy(missing_uploads):
        # the list api occassionally has issues, so we will recheck any missing
        # entries. If any are actually missing we will quit checking because
        # there is definitely work that needs to be done
        if bucket.get_key(entry):
            print 'List method erronious did not return data for key:{entry}'.format(entry=entry)
            missing_uploads.discard(entry)
        else:
            return missing_uploads

    return missing_uploads
def verify_sharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a sharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'sharddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """
    zk = host_utils.MysqlZookeeper()
    example_shard = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type][
        'example_shard']
    schema_host = zk.shard_to_instance(example_shard,
                                       repl_type=host_utils.REPLICA_ROLE_SLAVE)
    tables = mysql_backup_csv.mysql_backup_csv(
        schema_host).get_tables_to_backup(
            environment_specific.convert_shard_to_db(example_shard))
    success = verify_csv_schema_upload(
        shard_type, date, schema_host,
        environment_specific.convert_shard_to_db(example_shard), tables)
    if instance:
        host_shard_map = zk.get_host_shard_map()
        (replica_set,
         replica_type) = zk.get_replica_set_from_instance(instance)
        master = zk.get_mysql_instance_from_replica_set(
            replica_set, host_utils.REPLICA_ROLE_MASTER)
        shards = host_shard_map[master.__str__()]
    else:
        shards = zk.get_shards_by_shard_type(shard_type)

    pool = multiprocessing.Pool(processes=CSV_CHECK_PROCESSES)
    pool_args = list()
    if not tables:
        raise Exception('No tables will be checked for backups')
    if not shards:
        raise Exception('No shards will be checked for backups')

    for table in tables:
        pool_args.append((table, shard_type, date, shards))
    results = pool.map(get_missing_uploads, pool_args)
    missing_uploads = set()
    for result in results:
        missing_uploads.update(result)

    if missing_uploads or not success:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print('Shard type {shard_type} is missing uploads:'
                  ''.format(shard_type=shard_type))
            pprint.pprint(missing_uploads)
        else:
            print('Shard type {shard_type} is missing {num} uploads'
                  ''.format(num=len(missing_uploads), shard_type=shard_type))
        return False
    else:
        if instance:
            print 'Instance {instance} is backed up'.format(instance=instance)
        else:
            # we have checked all shards, all are good, create success files
            boto_conn = boto.connect_s3()
            bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                          validate=False)
            for table in tables:
                (_, _,
                 success_path) = environment_specific.get_csv_backup_paths(
                     date,
                     environment_specific.convert_shard_to_db(example_shard),
                     table, shard_type)
                if not bucket.get_key(success_path):
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')
            print 'Shard type {shard_type} is backed up'.format(
                shard_type=shard_type)

        return True
Example #8
0
def verify_sharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a sharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'sharddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """

    if instance:
        # if we are running the check on a specific instance, we will add in
        # some additional logic to not cause false alarms during the backup
        # process.
        if (date == (datetime.datetime.utcnow().date() -
                     datetime.timedelta(days=1)).strftime("%Y-%m-%d")):
            # For todays date, we give CSV_STARTUP minutes before checking anything.
            if datetime.datetime.utcnow().time() < CSV_STARTUP:
                print 'Backup startup time has not yet passed'
                return True

            if datetime.datetime.utcnow().time() < CSV_COMPLETION_TIME:
                # For todays date, until after CSV_COMPLETION_TIME it is good enough
                # to check if backups are running. If they are running, everything
                # is ok. If they are not running, we will do all the normal checks.
                if csv_backups_running(instance):
                    print 'Backup running on {i}'.format(i=instance)
                    return True

    schema_db = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type][
        'example_schema']
    zk = host_utils.MysqlZookeeper()
    schema_host = zk.shard_to_instance(schema_db,
                                       repl_type=host_utils.REPLICA_ROLE_SLAVE)
    (success, tables) = \
        verify_csv_schema_upload(shard_type, date, schema_host, schema_db,
                                 mysql_backup_csv.PATH_DAILY_BACKUP_SHARDED_SCHEMA)

    if not success:
        # problem with schema, don't bother verifying data
        return False

    if instance:
        host_shard_map = zk.get_host_shard_map()
        (replica_set,
         replica_type) = zk.get_replica_set_from_instance(instance)
        master = zk.get_mysql_instance_from_replica_set(
            replica_set, host_utils.REPLICA_ROLE_MASTER)
        shards = host_shard_map[master.__str__()]
    else:
        shards = zk.get_shards_by_shard_type(shard_type)

    missing_uploads = set()
    for table in tables:
        expected_s3_keys = set()
        prefix = None
        for shard in shards:
            key = mysql_backup_csv.PATH_DAILY_BACKUP.format(
                table=table,
                hostname_prefix=shard_type,
                date=date,
                db_name=environment_specific.convert_shard_to_db(shard))
            expected_s3_keys.add(key)
            if not prefix:
                prefix = os.path.dirname(key)

        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET,
                                      validate=False)
        uploaded_keys = set()
        for key in bucket.list(prefix=prefix):
            uploaded_keys.add(key.name)
        missing = expected_s3_keys.difference(uploaded_keys)
        if missing and table not in environment_specific.IGNORABLE_MISSING_TABLES:
            missing_uploads.update(missing)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {uploads}'.format(uploads=missing_uploads)
        else:
            print 'Missing {num} uploads'.format(num=len(missing_uploads))
        return False
    else:
        if not instance:
            # we have checked all shards, all are good, create success files
            for table in tables:
                key_name = mysql_backup_csv.PATH_DAILY_SUCCESS.format(
                    table=table, hostname_prefix=shard_type, date=date)
                if bucket.get_key(key_name):
                    print 'Key already exists {key}'.format(key=key)
                else:
                    print 'Creating success key {key}'.format(key=key)
                    key = bucket.new_key(key_name)
                    key.set_contents_from_string('')

        return True
Example #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db',
                        help='What server, shard or replica set to connect to '
                             '(ie sharddb021b[:3306], db00003, pbdata03862, '
                             'follower_zendata001002)')
    parser.add_argument('-p',
                        '--privileges',
                        help=''.join(('Default is ', DEFAULT_ROLE)),
                        default=DEFAULT_ROLE,
                        choices=environment_specific.CLI_ROLES.keys())
    parser.add_argument('-l',
                        '--longquery',
                        default=False,
                        action='store_true',
                        help='For standard read or write access, use this '
                             'flag if you expect the query to take more than '
                             '10 seconds.')
    parser.add_argument('-e',
                        '--execute',
                        help='An optional SQL command to run.',
                        default=False)
    args = parser.parse_args()
    zk = host_utils.MysqlZookeeper()
    host = None
    db = ''

    role_modifier = 'default'
    long_query = ''
    if args.longquery:
        role_modifier = 'long'
        long_query = '(long queries enabled)'

    # check if db exists in dns, if so the supplied argument will be considered
    # a hostname, otherwise a replica set.
    try:
        socket.gethostbyname(args.db)
        host = host_utils.HostAddr(args.db)
        log.info('{db} appears to be a hostname'.format(db=args.db))
    except:
        log.info('{db} appears not to be a hostname'.format(db=args.db))

    # Maybe it is a replica set
    if not host:
        config = zk.get_all_mysql_config()
        if args.db in config:
            master = config[args.db]['master']
            log.info('{db} appears to be a replica set'.format(db=args.db))
            host = host_utils.HostAddr(''.join((master['host'],
                                                ':',
                                                str(master['port']))))
        else:
            log.info('{db} appears not to be a replica set'.format(db=args.db))

    # Perhaps a shard?
    if not host:
        shard_map = zk.get_host_shard_map()
        for master in shard_map:
            if args.db in shard_map[master]:
                log.info('{db} appears to be a shard'.format(db=args.db))
                host = host_utils.HostAddr(master)
                db = environment_specific.convert_shard_to_db(args.db)
                break
        if not host:
            log.info('{db} appears not to be a shard'.format(db=args.db))

    if not host:
        raise Exception('Could not determine what host to connect to')

    log.info('Will connect to {host} with {privileges} '
             'privileges {lq}'.format(host=host,
                                      privileges=args.privileges,
                                      lq=long_query))
    (username, password) = mysql_lib.get_mysql_user_for_role(
        environment_specific.CLI_ROLES[args.privileges][role_modifier])

    if args.execute:
        execute_escaped = string.replace(args.execute, '"', '\\"')
        cmd = MYSQL_CLI_EX.format(host=host.hostname,
                                  port=host.port,
                                  db=db,
                                  user=username,
                                  password=password,
                                  execute=execute_escaped)
    else:
        cmd = MYSQL_CLI.format(host=host.hostname,
                               port=host.port,
                               db=db,
                               user=username,
                               password=password)
    log.info(cmd)
    proc = subprocess.Popen(cmd, shell=True)
    proc.wait()
def verify_sharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a sharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'sharddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """

    if instance:
        # if we are running the check on a specific instance, we will add in
        # some additional logic to not cause false alarms during the backup
        # process.
        if (date == (datetime.datetime.utcnow().date() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")):
            # For todays date, we give CSV_STARTUP minutes before checking anything.
            if datetime.datetime.utcnow().time() < CSV_STARTUP:
                print 'Backup startup time has not yet passed'
                return True

            if datetime.datetime.utcnow().time() < CSV_COMPLETION_TIME:
                # For todays date, until after CSV_COMPLETION_TIME it is good enough
                # to check if backups are running. If they are running, everything
                # is ok. If they are not running, we will do all the normal checks.
                if csv_backups_running(instance):
                    print 'Backup running on {i}'.format(i=instance)
                    return True

    schema_db = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type]['example_schema']
    zk = host_utils.MysqlZookeeper()
    schema_host = zk.shard_to_instance(schema_db, repl_type=host_utils.REPLICA_ROLE_SLAVE)
    (success, tables) = \
        verify_csv_schema_upload(shard_type, date, schema_host, schema_db,
                                 mysql_backup_csv.PATH_DAILY_BACKUP_SHARDED_SCHEMA)

    if not success:
        # problem with schema, don't bother verifying data
        return False

    if instance:
        host_shard_map = zk.get_host_shard_map()
        (replica_set, replica_type) = zk.get_replica_set_from_instance(instance)
        master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
        shards = host_shard_map[master.__str__()]
    else:
        shards = zk.get_shards_by_shard_type(shard_type)

    missing_uploads = set()
    for table in tables:
        expected_s3_keys = set()
        prefix = None
        for shard in shards:
            key = mysql_backup_csv.PATH_DAILY_BACKUP.format(table=table,
                                                            hostname_prefix=shard_type,
                                                            date=date,
                                                            db_name=environment_specific.convert_shard_to_db(shard))
            expected_s3_keys.add(key)
            if not prefix:
                prefix = os.path.dirname(key)

        boto_conn = boto.connect_s3()
        bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
        uploaded_keys = set()
        for key in bucket.list(prefix=prefix):
            uploaded_keys.add(key.name)
        missing = expected_s3_keys.difference(uploaded_keys)
        if missing and table not in environment_specific.IGNORABLE_MISSING_TABLES:
            missing_uploads.update(missing)

    if missing_uploads:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print 'Missing uploads: {uploads}'.format(uploads=missing_uploads)
        else:
            print 'Missing {num} uploads'.format(num=len(missing_uploads))
        return False
    else:
        if not instance:
            # we have checked all shards, all are good, create success files
            for table in tables:
                key_name = mysql_backup_csv.PATH_DAILY_SUCCESS.format(table=table,
                                                                      hostname_prefix=shard_type,
                                                                      date=date)
                if bucket.get_key(key_name):
                    print 'Key already exists {key}'.format(key=key)
                else:
                    print 'Creating success key {key}'.format(key=key)
                    key = bucket.new_key(key_name)
                    key.set_contents_from_string('')

        return True
Example #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db',
                        help='What server, shard or replica set to connect to '
                             '(ie sharddb021b[:3306], db00003, pbdata03862, '
                             'follower_zendata001002)')
    parser.add_argument('-p',
                        '--privileges',
                        help=''.join(('Default is ', DEFAULT_ROLE)),
                        default=DEFAULT_ROLE,
                        choices=environment_specific.CLI_ROLES.keys())
    parser.add_argument('-l',
                        '--longquery',
                        default=False,
                        action='store_true',
                        help='For standard read or write access, use this '
                             'flag if you expect the query to take more than '
                             '10 seconds.')
    parser.add_argument('-e',
                        '--execute',
                        help='An optional SQL command to run.',
                        default=False)
    args = parser.parse_args()
    zk = host_utils.MysqlZookeeper()
    host = None
    db = ''

    role_modifier = 'default'
    long_query = ''
    if args.longquery:
        role_modifier = 'long'
        long_query = '(long queries enabled)'

    # check if db exists in dns, if so the supplied argument will be considered
    # a hostname, otherwise a replica set.
    try:
        socket.gethostbyname(args.db)
        host = host_utils.HostAddr(args.db)
        log.info('{db} appears to be a hostname'.format(db=args.db))
    except:
        log.info('{db} appears not to be a hostname'.format(db=args.db))

    # Maybe it is a replica set
    if not host:
        config = zk.get_all_mysql_config()
        if args.db in config:
            master = config[args.db]['master']
            log.info('{db} appears to be a replica set'.format(db=args.db))
            host = host_utils.HostAddr(''.join((master['host'],
                                                ':',
                                                str(master['port']))))
        else:
            log.info('{db} appears not to be a replica set'.format(db=args.db))

    # Perhaps a shard?
    if not host:
        shard_map = zk.get_host_shard_map()
        for master in shard_map:
            if args.db in shard_map[master]:
                log.info('{db} appears to be a shard'.format(db=args.db))
                host = host_utils.HostAddr(master)
                db = environment_specific.convert_shard_to_db(args.db)
                break
        if not host:
            log.info('{db} appears not to be a shard'.format(db=args.db))

    if not host:
        raise Exception('Could not determine what host to connect to')

    log.info('Will connect to {host} with {privileges} '
             'privileges {lq}'.format(host=host,
                                      privileges=args.privileges,
                                      lq=long_query))
    (username, password) = mysql_lib.get_mysql_user_for_role(
        environment_specific.CLI_ROLES[args.privileges][role_modifier])

    # we may or may not know what replica set we're connecting to at
    # this point.
    sql_safe = ''
    try:
        replica_set, _ = zk.get_replica_set_from_instance(host)
    except Exception as e:
        if 'is not in zk' in e.message:
            log.warning('SERVER IS NOT IN ZK!!!')
            replica_set = None
        else:
            raise

    try:
        # do we need a prompt?
        if replica_set in environment_specific.EXTRA_PARANOID_REPLICA_SETS:
            if args.privileges in ['read-write', 'admin']:
                resp = raw_input("You've asked for {priv} access to replica "
                                 "set {rs}.  Are you sure? (Y/N): ".format(
                                    priv=args.privileges,
                                    rs=replica_set))
                if not resp or resp[0] not in ['Y', 'y']:
                    raise Exception('Connection aborted by user!')

        # should we enable safe-updates?
        if replica_set in environment_specific.PARANOID_REPLICA_SETS:
            if args.privileges in ['read-write', 'admin']:
                sql_safe = '--init-command="SET SESSION SQL_SAFE_UPDATES=ON"'

    except Exception as e:
        log.error("Unable to continue: {}".format(e))
        return

    if args.execute:
        execute_escaped = string.replace(args.execute, '"', '\\"')
        cmd = MYSQL_CLI_EX.format(host=host.hostname,
                                  port=host.port,
                                  db=db,
                                  user=username,
                                  password=password,
                                  sql_safe=sql_safe,
                                  execute=execute_escaped)
    else:
        cmd = MYSQL_CLI.format(host=host.hostname,
                               port=host.port,
                               db=db,
                               user=username,
                               password=password,
                               sql_safe=sql_safe)
    log.info(cmd)
    proc = subprocess.Popen(cmd, shell=True)
    proc.wait()
def verify_sharded_csv_backup(shard_type, date, instance=None):
    """ Verify that a sharded data set has been backed up to hive

    Args:
    shard_type -  i.e. 'sharddb', etc
    date - The date to search for
    instance - Restrict the search to problem on a single instnace

    Returns True for no problems found, False otherwise.
    """
    zk = host_utils.MysqlZookeeper()
    example_shard = environment_specific.SHARDED_DBS_PREFIX_MAP[shard_type]['example_shard']
    schema_host = zk.shard_to_instance(example_shard, repl_type=host_utils.REPLICA_ROLE_SLAVE)
    tables = mysql_backup_csv.mysql_backup_csv(schema_host).get_tables_to_backup(environment_specific.convert_shard_to_db(example_shard))
    success = verify_csv_schema_upload(shard_type, date, schema_host,
                                       environment_specific.convert_shard_to_db(example_shard), tables)
    if instance:
        host_shard_map = zk.get_host_shard_map()
        (replica_set, replica_type) = zk.get_replica_set_from_instance(instance)
        master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
        shards = host_shard_map[master.__str__()]
    else:
        shards = zk.get_shards_by_shard_type(shard_type)

    pool = multiprocessing.Pool(processes=CSV_CHECK_PROCESSES)
    pool_args = list()
    if not tables:
        raise Exception('No tables will be checked for backups')
    if not shards:
        raise Exception('No shards will be checked for backups')

    for table in tables:
        pool_args.append((table, shard_type, date, shards))
    results = pool.map(get_missing_uploads, pool_args)
    missing_uploads = set()
    for result in results:
        missing_uploads.update(result)

    if missing_uploads or not success:
        if len(missing_uploads) < MISSING_BACKUP_VERBOSE_LIMIT:
            print ('Shard type {shard_type} is missing uploads:'
                   ''.format(shard_type=shard_type))
            pprint.pprint(missing_uploads)
        else:
            print ('Shard type {shard_type} is missing {num} uploads'
                   ''.format(num=len(missing_uploads),
                             shard_type=shard_type))
        return False
    else:
        if instance:
            print 'Instance {instance} is backed up'.format(instance=instance)
        else:
            # we have checked all shards, all are good, create success files
            boto_conn = boto.connect_s3()
            bucket = boto_conn.get_bucket(environment_specific.S3_CSV_BUCKET, validate=False)
            for table in tables:
                (_, _, success_path) = environment_specific.get_csv_backup_paths(date,
                                                                                 environment_specific.convert_shard_to_db(example_shard),
                                                                                 table, shard_type)
                if not bucket.get_key(success_path):
                    print 'Creating success key {key}'.format(key=success_path)
                    key = bucket.new_key(success_path)
                    key.set_contents_from_string('')
            print 'Shard type {shard_type} is backed up'.format(shard_type=shard_type)

        return True
Example #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db',
                        help='What server, shard or replica set to connect to '
                        '(ie sharddb021b[:3306], db00003, pbdata03862, '
                        'follower_zendata001002)')
    parser.add_argument('-p',
                        '--privileges',
                        help=''.join(('Default is ', DEFAULT_ROLE)),
                        default=DEFAULT_ROLE,
                        choices=environment_specific.CLI_ROLES.keys())
    parser.add_argument('-l',
                        '--longquery',
                        default=False,
                        action='store_true',
                        help='For standard read or write access, use this '
                        'flag if you expect the query to take more than '
                        '10 seconds.')
    parser.add_argument('-e',
                        '--execute',
                        help='An optional SQL command to run.',
                        default=False)
    args = parser.parse_args()
    zk = host_utils.MysqlZookeeper()
    host = None
    db = ''

    role_modifier = 'default'
    long_query = ''
    if args.longquery:
        role_modifier = 'long'
        long_query = '(long queries enabled)'

    # check if db exists in dns, if so the supplied argument will be considered
    # a hostname, otherwise a replica set.
    try:
        socket.gethostbyname(args.db)
        host = host_utils.HostAddr(args.db)
        log.info('{db} appears to be a hostname'.format(db=args.db))
    except:
        log.info('{db} appears not to be a hostname'.format(db=args.db))

    # Maybe it is a replica set
    if not host:
        config = zk.get_all_mysql_config()
        if args.db in config:
            master = config[args.db]['master']
            log.info('{db} appears to be a replica set'.format(db=args.db))
            host = host_utils.HostAddr(''.join(
                (master['host'], ':', str(master['port']))))
        else:
            log.info('{db} appears not to be a replica set'.format(db=args.db))

    # Perhaps a shard?
    if not host:
        shard_map = zk.get_host_shard_map()
        for master in shard_map:
            if args.db in shard_map[master]:
                log.info('{db} appears to be a shard'.format(db=args.db))
                host = host_utils.HostAddr(master)
                db = environment_specific.convert_shard_to_db(args.db)
                break
        if not host:
            log.info('{db} appears not to be a shard'.format(db=args.db))

    if not host:
        raise Exception('Could not determine what host to connect to')

    log.info('Will connect to {host} with {privileges} '
             'privileges {lq}'.format(host=host,
                                      privileges=args.privileges,
                                      lq=long_query))
    (username, password) = mysql_lib.get_mysql_user_for_role(
        environment_specific.CLI_ROLES[args.privileges][role_modifier])

    if args.execute:
        execute_escaped = string.replace(args.execute, '"', '\\"')
        cmd = MYSQL_CLI_EX.format(host=host.hostname,
                                  port=host.port,
                                  db=db,
                                  user=username,
                                  password=password,
                                  execute=execute_escaped)
    else:
        cmd = MYSQL_CLI.format(host=host.hostname,
                               port=host.port,
                               db=db,
                               user=username,
                               password=password)
    log.info(cmd)
    proc = subprocess.Popen(cmd, shell=True)
    proc.wait()
Example #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db',
                        help='What server, shard or replica set to connect to '
                        '(ie sharddb021b[:3306], db00003, pbdata03862, '
                        'follower_zendata001002)')
    parser.add_argument('-p',
                        '--privileges',
                        help=''.join(('Default is ', DEFAULT_ROLE)),
                        default=DEFAULT_ROLE,
                        choices=environment_specific.CLI_ROLES.keys())
    parser.add_argument('-l',
                        '--longquery',
                        default=False,
                        action='store_true',
                        help='For standard read or write access, use this '
                        'flag if you expect the query to take more than '
                        '10 seconds.')
    parser.add_argument('-e',
                        '--execute',
                        help='An optional SQL command to run.',
                        default=False)
    args = parser.parse_args()
    zk = host_utils.MysqlZookeeper()
    host = None
    db = ''

    role_modifier = 'default'
    long_query = ''
    if args.longquery:
        role_modifier = 'long'
        long_query = '(long queries enabled)'

    # check if db exists in dns, if so the supplied argument will be considered
    # a hostname, otherwise a replica set.
    try:
        socket.gethostbyname(args.db)
        host = host_utils.HostAddr(args.db)
        log.info('{db} appears to be a hostname'.format(db=args.db))
    except:
        log.info('{db} appears not to be a hostname'.format(db=args.db))

    # Maybe it is a replica set
    if not host:
        config = zk.get_all_mysql_config()
        if args.db in config:
            master = config[args.db]['master']
            log.info('{db} appears to be a replica set'.format(db=args.db))
            host = host_utils.HostAddr(''.join(
                (master['host'], ':', str(master['port']))))
        else:
            log.info('{db} appears not to be a replica set'.format(db=args.db))

    # Perhaps a shard?
    if not host:
        shard_map = zk.get_host_shard_map()
        for master in shard_map:
            if args.db in shard_map[master]:
                log.info('{db} appears to be a shard'.format(db=args.db))
                host = host_utils.HostAddr(master)
                db = environment_specific.convert_shard_to_db(args.db)
                break
        if not host:
            log.info('{db} appears not to be a shard'.format(db=args.db))

    if not host:
        raise Exception('Could not determine what host to connect to')

    log.info('Will connect to {host} with {privileges} '
             'privileges {lq}'.format(host=host,
                                      privileges=args.privileges,
                                      lq=long_query))
    (username, password) = mysql_lib.get_mysql_user_for_role(
        environment_specific.CLI_ROLES[args.privileges][role_modifier])

    # we may or may not know what replica set we're connecting to at
    # this point.
    sql_safe = ''
    try:
        replica_set, _ = zk.get_replica_set_from_instance(host)
    except Exception as e:
        if 'is not in zk' in e.message:
            log.warning('SERVER IS NOT IN ZK!!!')
            replica_set = None
        else:
            raise

    try:
        # do we need a prompt?
        if replica_set in environment_specific.EXTRA_PARANOID_REPLICA_SETS:
            if args.privileges in ['read-write', 'admin']:
                resp = raw_input("You've asked for {priv} access to replica "
                                 "set {rs}.  Are you sure? (Y/N): ".format(
                                     priv=args.privileges, rs=replica_set))
                if not resp or resp[0] not in ['Y', 'y']:
                    raise Exception('Connection aborted by user!')

        # should we enable safe-updates?
        if replica_set in environment_specific.PARANOID_REPLICA_SETS:
            if args.privileges in ['read-write', 'admin']:
                sql_safe = '--init-command="SET SESSION SQL_SAFE_UPDATES=ON"'

    except Exception as e:
        log.error("Unable to continue: {}".format(e))
        return

    if args.execute:
        execute_escaped = string.replace(args.execute, '"', '\\"')
        cmd = MYSQL_CLI_EX.format(host=host.hostname,
                                  port=host.port,
                                  db=db,
                                  user=username,
                                  password=password,
                                  sql_safe=sql_safe,
                                  execute=execute_escaped)
    else:
        cmd = MYSQL_CLI.format(host=host.hostname,
                               port=host.port,
                               db=db,
                               user=username,
                               password=password,
                               sql_safe=sql_safe)
    log.info(cmd)
    proc = subprocess.Popen(cmd, shell=True)
    proc.wait()