def _s3connection_opts_from_uri(impl): # 'impl' should look like: # # <protocol>+<calling_format>://[user:pass]@<host>[:port] # # A concrete example: # # https+virtualhost://user:pass@localhost:1235 o = urlparse.urlparse(impl, allow_fragments=False) if o.scheme is not None: proto_match = re.match( r'(?P<protocol>http|https)\+' r'(?P<format>virtualhost|path|subdomain)', o.scheme) if proto_match is None: raise UserException( msg='WALE_S3_ENDPOINT URI scheme is invalid', detail='The scheme defined is ' + repr(o.scheme), hint='An example of a valid scheme is https+virtualhost.') opts = {} if proto_match.group('protocol') == 'http': opts['is_secure'] = False else: # Constrained by prior regexp. proto_match.group('protocol') == 'https' opts['is_secure'] = True f = proto_match.group('format') if f == 'virtualhost': opts['calling_format'] = connection.VHostCallingFormat() elif f == 'path': opts['calling_format'] = connection.OrdinaryCallingFormat() elif f == 'subdomain': opts['calling_format'] = connection.SubdomainCallingFormat() else: # Constrained by prior regexp. assert False if o.username is not None or o.password is not None: raise UserException( msg='WALE_S3_ENDPOINT does not support username or password') if o.hostname is not None: opts['host'] = o.hostname if o.port is not None: opts['port'] = o.port if o.path: raise UserException( msg='WALE_S3_ENDPOINT does not support a URI path', detail='Path is {0!r}'.format(o.path)) if o.query: raise UserException( msg='WALE_S3_ENDPOINT does not support query parameters') return opts
def database_verify(self, data_directory): """ User command which finds the most recent database restore wale info dir and invokes verification based on that. This only works immediately after restore before any database recovery of course. """ manifests_dir = manifest.directory(data_directory) if not os.path.isdir(manifests_dir): raise UserException( msg='Did not find a valid WAL-E restore information', detail='Expected to find a directory named .wal-e/restore_info' ) logger.info('Verifying database against manifests from {0}' ''.format(manifests_dir)) with open(os.path.join(data_directory, '.wal-e', 'restore-spec.json'), 'r') as f: spec = json.load(f) if self._database_verify(data_directory, manifests_dir, spec): logger.info('Verification against manifests passed') else: logger.info('Verification against manifests FAILED') raise UserException( msg='Verification of database failed', detail='Check logs for details of discrepancies found')
def __init__(self, member_name, limited_to, requested, *args, **kwargs): self.member_name = member_name self.max_size = limited_to self.requested = requested msg = 'Attempted to archive a file that is too large.' hint = ('There is a file in the postgres database directory that ' 'is larger than %d bytes. If no such file exists, please ' 'report this as a bug. In particular, check %s, which appears ' 'to be %d bytes.') % (limited_to, member_name, requested) UserException.__init__(self, msg=msg, hint=hint, *args, **kwargs)
def __init__(self, member_name, limited_to, requested, *args, **kwargs): self.member_name = member_name self.max_size = limited_to self.requested = requested msg = 'Attempted to archive a file that is too large.' hint = ('There is a file in the postgres database directory that ' 'is larger than %d bytes. If no such file exists, please ' 'report this as a bug. In particular, ' 'check %s.') % (limited_to, member_name) UserException.__init__(self, msg=msg, hint=hint, *args, **kwargs)
def find_all(self, query): """ A procedure to assist in finding or detailing specific backups Currently supports: * a backup name (base_number_number) * the psuedo-name LATEST, which finds the lexically highest backup """ match = re.match(s3_storage.BASE_BACKUP_REGEXP, query) if match is not None: for backup in iter(self): if backup.name == query: yield backup elif query == 'LATEST': all_backups = list(iter(self)) if not all_backups: yield None return assert len(all_backups) > 0 all_backups.sort() yield all_backups[-1] else: raise UserException( msg='invalid backup query submitted', detail='The submitted query operator was "{0}."'.format(query))
def psql_csv_run(sql_command, error_handler=None): """ Runs psql and returns a CSVReader object from the query This CSVReader includes header names as the first record in all situations. The output is fully buffered into Python. """ csv_query = ('COPY ({query}) TO STDOUT WITH CSV HEADER;'.format( query=sql_command)) psql_proc = popen_nonblock([PSQL_BIN, '-d', 'postgres', '-c', csv_query], stdout=PIPE) stdout = psql_proc.communicate()[0] if psql_proc.returncode != 0: if error_handler is not None: error_handler(psql_proc) else: assert error_handler is None raise UserException( 'could not csv-execute a query successfully via psql', 'Query was "{query}".'.format(sql_command), 'You may have to set some libpq environment ' 'variables if you are sure the server is running.') # Previous code must raise any desired exceptions for non-zero # exit codes assert psql_proc.returncode == 0 # Fake enough iterator interface to get a CSV Reader object # that works. return csv.reader(iter(stdout.strip().split('\n')))
def must_resolve(region): if region in _S3_REGIONS: endpoint = _S3_REGIONS[region] return endpoint else: raise UserException(msg='Could not resolve host for AWS_REGION', detail='AWS_REGION is set to "{0}".' .format(region))
def s3_explicit_creds(args): access_key = args.aws_access_key_id or os.getenv('AWS_ACCESS_KEY_ID') if access_key is None: raise UserException( msg='AWS Access Key credential is required but not provided', hint=(_config_hint_generate('aws-access-key-id', True))) secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') if secret_key is None: raise UserException( msg='AWS Secret Key credential is required but not provided', hint=_config_hint_generate('aws-secret-access-key', False)) security_token = os.getenv('AWS_SECURITY_TOKEN') from wal_e.blobstore import s3 return s3.Credentials(access_key, secret_key, security_token)
def get_credentials(self, access_key=None, secret_key=None, security_token=None, profile_name=None): if self.MetadataServiceSupport[self.name]: self._populate_keys_from_metadata_server() if not self._secret_key: raise UserException('Could not retrieve secret key from instance ' 'profile.', hint='Check that your instance has an IAM ' 'profile or set --aws-access-key-id')
def gs_creds(args): from wal_e.blobstore import gs if args.gs_instance_metadata: access_key, secret_key = None, None else: access_key = args.gs_access_key_id or os.getenv('GS_ACCESS_KEY_ID') if access_key is None: raise UserException( msg='GS Access Key credential is required but not provided', hint=(_config_hint_generate('gs-access-key-id', True))) secret_key = os.getenv('GS_SECRET_ACCESS_KEY') if secret_key is None: raise UserException( msg='GS Secret Key credential is required but not provided', hint=_config_hint_generate('gs-secret-access-key', False)) return gs.Credentials(access_key, secret_key)
def run_pg_basebackup(cls, user, host, archive_directory): psql_proc = popen_nonblock([PG_BASEBACKUP_BIN, '--write-recovery-conf', '--format=plain', '-D', archive_directory, '--host', host, '--username', user, '--xlog-method=stream'], stdout=PIPE) stdout = psql_proc.communicate()[0].decode('utf-8') if psql_proc.returncode != 0: raise UserException("Could not run pg_basebackup: {stdout}" .format(stdout=stdout)) assert psql_proc.returncode == 0 return
def wal_archive(self, wal_path, concurrency=1): """ Uploads a WAL file to S3 or Windows Azure Blob Service This code is intended to typically be called from Postgres's archive_command feature. """ # Upload the segment expressly indicated. It's special # relative to other uploads when parallel wal-push is enabled, # in that it's not desirable to tweak its .ready/.done files # in archive_status. xlog_dir = os.path.dirname(wal_path) segment = WalSegment(wal_path, explicit=True) uploader = WalUploader(self.layout, self.creds, self.gpg_key_id) group = WalTransferGroup(uploader) group.start(segment) # Upload any additional wal segments up to the specified # concurrency by scanning the Postgres archive_status # directory. started = 1 seg_stream = WalSegment.from_ready_archive_status(xlog_dir) while started < concurrency: try: other_segment = next(seg_stream) except StopIteration: break if other_segment.path != wal_path: group.start(other_segment) started += 1 try: # Wait for uploads to finish. group.join() except EnvironmentError as e: if e.errno == errno.ENOENT: print(e) raise UserException( msg='could not find file for wal-push', detail=('The operating system reported: {0} {1}'.format( e.strerror, repr(e.filename)))) raise
def sigv4_check_apply(): # Insist that one of AWS_REGION or WALE_S3_ENDPOINT is defined. # The former is for authenticating correctly with AWS SigV4. # # The latter is for alternate implementations that are # S3-interface compatible. Many, or most, of these do not support # AWS SigV4 at all and none are known to require SigV4 (and # instead use the non-region-demanding SigV2), so simplify by # relaxing the AWS_REGION requirement in that case. region = os.getenv('AWS_REGION') endpoint = os.getenv('WALE_S3_ENDPOINT') if region and endpoint: logger.warning(msg='WALE_S3_ENDPOINT defined, ignoring AWS_REGION', hint='AWS_REGION is only intended for use with AWS S3, ' 'and not interface-compatible use cases supported by ' 'WALE_S3_ENDPOINT') elif region and not endpoint: # Normal case for an AWS user: Set up SigV4, which can only be # enacted globally. if not boto.config.has_option('s3', 'use-sigv4'): if not boto.config.has_section('s3'): boto.config.add_section('s3') boto.config.set('s3', 'use-sigv4', 'True') elif not region and endpoint: # Normal case for a S3-interface-compatible user, e.g. RADOS # or Deis users. SigV4 doesn't have the same level of uptake # on those implementations. pass elif not region and not endpoint: raise UserException( msg='must define one of AWS_REGION or WALE_S3_ENDPOINT', hint=( 'AWS users will want to set AWS_REGION, those using ' 'alternative S3-compatible systems will want to use ' 'WALE_S3_ENDPOINT.' ) ) else: # Entire Cartesian product should be handled. assert False
def _verify_restore_paths(self, restore_spec): path_prefix = restore_spec['base_prefix'] bad_links = [] if 'tablespaces' not in restore_spec: return for tblspc in restore_spec['tablespaces']: tblspc_link = os.path.join(path_prefix, 'pg_tblspc', tblspc) valid = os.path.islink(tblspc_link) and os.path.isdir(tblspc_link) if not valid: bad_links.append(tblspc) if bad_links: raise UserException( msg='Symlinks for some tablespaces not found or created.', detail=('Symlinks for the following tablespaces were not ' 'found: {spaces}'.format(spaces=', '.join(bad_links))), hint=('Ensure all required symlinks are created prior to ' 'running backup-fetch, or use --blind-restore to ' 'ignore symlinking. Alternatively supply a restore ' 'spec to have WAL-E create tablespace symlinks for you'))
def database_s3_fetch(self, pg_cluster_dir, backup_name, pool_size): if os.path.exists(os.path.join(pg_cluster_dir, 'postmaster.pid')): raise UserException( msg='attempting to overwrite a live data directory', detail='Found a postmaster.pid lockfile, and aborting', hint='Shut down postgres. If there is a stale lockfile, ' 'then remove it after being very sure postgres is not ' 'running.') layout = s3_storage.StorageLayout(self.s3_prefix) s3_connections = [] for i in xrange(pool_size): s3_connections.append(self.new_connection()) bl = s3_worker.BackupList(s3_connections[0], s3_storage.StorageLayout(self.s3_prefix), detail=False) # If there is no query, return an exhaustive list, otherwise # find a backup instad. backups = list(bl.find_all(backup_name)) assert len(backups) <= 1 if len(backups) == 0: raise UserException( msg='no backups found for fetching', detail='No backup matching the query {0} was able to be ' 'located.'.format(backup_name)) elif len(backups) > 1: raise UserException( msg='more than one backup found for fetching', detail='More than one backup matching the query {0} was able ' 'to be located.'.format(backup_name), hint='To list qualifying backups, ' 'try "wal-e backup-list QUERY".') # There must be exactly one qualifying backup at this point. assert len(backups) == 1 backup_info = backups[0] layout.basebackup_tar_partition_directory(backup_info) partition_iter = s3_worker.TarPartitionLister( s3_connections[0], layout, backup_info) assert len(s3_connections) == pool_size fetchers = [] for i in xrange(pool_size): fetchers.append(s3_worker.BackupFetcher( s3_connections[i], layout, backup_info, pg_cluster_dir, (self.gpg_key_id is not None))) assert len(fetchers) == pool_size p = gevent.pool.Pool(size=pool_size) fetcher_cycle = itertools.cycle(fetchers) for part_name in partition_iter: p.spawn( self._exception_gather_guard( fetcher_cycle.next().fetch_partition), part_name) p.join(raise_error=True)
def configure_backup_cxt(args): # Try to find some WAL-E prefix to store data in. prefix = (args.s3_prefix or args.wabs_prefix or os.getenv('WALE_S3_PREFIX') or os.getenv('WALE_WABS_PREFIX') or os.getenv('WALE_SWIFT_PREFIX')) if prefix is None: raise UserException( msg='no storage prefix defined', hint=( 'Either set one of the --wabs-prefix or --s3-prefix options or' ' define one of the WALE_WABS_PREFIX, WALE_S3_PREFIX, or ' 'WALE_SWIFT_PREFIX environment variables.' ) ) store = storage.StorageLayout(prefix) # GPG can be optionally layered atop of every backend, so a common # code path suffices. gpg_key_id = args.gpg_key_id or os.getenv('WALE_GPG_KEY_ID') if gpg_key_id is not None: external_program_check([GPG_BIN]) # Define some hint-text generator to help the user with consistent # language between storage backends when possible. def _opt_env_hint(optname): option = '--' + optname.lower() env = optname.replace('-', '_').upper() return ('Pass "{0}" or set the environment variable "{1}".' .format(option, env)) def _env_hint(optname): env = optname.replace('-', '_').upper() return 'Set the environment variable {0}.'.format(env) # Enumeration of reading in configuration for all supported # backend data stores, yielding value adhering to the # 'operator.Backup' protocol. if store.is_s3: access_key = args.aws_access_key_id or os.getenv('AWS_ACCESS_KEY_ID') if access_key is None: raise UserException( msg='AWS Access Key credential is required but not provided', hint=(_opt_env_hint('aws-access-key-id'))) secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') if secret_key is None: raise UserException( msg='AWS Secret Key credential is required but not provided', hint=_env_hint('aws-secret-access-key')) security_token = os.getenv('AWS_SECURITY_TOKEN') from wal_e.blobstore import s3 from wal_e.operator.s3_operator import S3Backup creds = s3.Credentials(access_key, secret_key, security_token) return S3Backup(store, creds, gpg_key_id) elif store.is_wabs: account_name = args.wabs_account_name or os.getenv('WABS_ACCOUNT_NAME') if account_name is None: raise UserException( msg='WABS account name is undefined', hint=_opt_env_hint('wabs-account-name')) access_key = os.getenv('WABS_ACCESS_KEY') if access_key is None: raise UserException( msg='WABS access key credential is required but not provided', hint=_env_hint('wabs-access-key')) from wal_e.blobstore import wabs from wal_e.operator.wabs_operator import WABSBackup creds = wabs.Credentials(account_name, access_key) return WABSBackup(store, creds, gpg_key_id) elif store.is_swift: from wal_e.blobstore import swift from wal_e.operator.swift_operator import SwiftBackup creds = swift.Credentials( os.getenv('SWIFT_AUTHURL'), os.getenv('SWIFT_USER'), os.getenv('SWIFT_PASSWORD'), os.getenv('SWIFT_TENANT'), os.getenv('SWIFT_REGION'), os.getenv('SWIFT_ENDPOINT_TYPE', 'publicURL'), ) return SwiftBackup(store, creds, gpg_key_id) else: raise UserCritical( msg='no unsupported blob stores should get here', hint='Report a bug.')
def handler(popen): assert popen.returncode != 0 raise UserException('Could not start hot backup')
try: import b2client assert b2client except ImportError: from wal_e.exception import UserException raise UserException( msg='Backblaze support requires the module "python-b2" ', hint='Try running "pip install python-b2') from wal_e.blobstore.b2.credentials import Credentials from wal_e.blobstore.b2.utils import ( do_lzop_get, uri_put_file, uri_get_file, write_and_return_error) __all__ = [ 'Credentials', 'do_lzop_get', 'uri_put_file', 'uri_get_file', 'write_and_return_error' ]
def database_backup(self, data_directory, *args, **kwargs): """Uploads a PostgreSQL file cluster to S3 or Windows Azure Blob Service Mechanism: just wraps _upload_pg_cluster_dir with start/stop backup actions with exception handling. In particular there is a 'finally' block to stop the backup in most situations. """ upload_good = False backup_stop_good = False while_offline = False start_backup_info = None if 'while_offline' in kwargs: while_offline = kwargs.pop('while_offline') try: if not while_offline: start_backup_info = PgBackupStatements.run_start_backup() version = PgBackupStatements.pg_version()['version'] else: if os.path.exists( os.path.join(data_directory, 'postmaster.pid')): hint = ('Shut down postgres. ' 'If there is a stale lockfile, ' 'then remove it after being very sure postgres ' 'is not running.') raise UserException( msg='while_offline set, but pg looks to be running', detail='Found a postmaster.pid lockfile, and aborting', hint=hint) ctrl_data = PgControlDataParser(data_directory) start_backup_info = ctrl_data.last_xlog_file_name_and_offset() version = ctrl_data.pg_version() ret_tuple = self._upload_pg_cluster_dir(start_backup_info, data_directory, version=version, *args, **kwargs) spec, uploaded_to, expanded_size_bytes = ret_tuple upload_good = True finally: if not upload_good: logger.warning( 'blocking on sending WAL segments', detail=('The backup was not completed successfully, ' 'but we have to wait anyway. ' 'See README: TODO about pg_cancel_backup')) if not while_offline: stop_backup_info = PgBackupStatements.run_stop_backup() else: stop_backup_info = start_backup_info backup_stop_good = True # XXX: Ugly, this is more of a 'worker' task because it might # involve retries and error messages, something that is not # treated by the "operator" category of modules. So # basically, if this small upload fails, the whole upload # fails! if upload_good and backup_stop_good: # Try to write a sentinel file to the cluster backup # directory that indicates that the base backup upload has # definitely run its course and also communicates what WAL # segments are needed to get to consistency. sentinel_content = json.dumps({ 'wal_segment_backup_stop': stop_backup_info['file_name'], 'wal_segment_offset_backup_stop': stop_backup_info['file_offset'], 'expanded_size_bytes': expanded_size_bytes, 'spec': spec }) # XXX: should use the storage operators. # # XXX: distinguish sentinels by *PREFIX* not suffix, # which makes searching harder. (For the next version # bump). uri_put_file(self.creds, uploaded_to + '_backup_stop_sentinel.json', BytesIO(sentinel_content.encode("utf8")), content_type='application/json') else: # NB: Other exceptions should be raised before this that # have more informative results, it is intended that this # exception never will get raised. raise UserCritical('could not complete backup process')
def configure_backup_cxt(args): # Try to find some WAL-E prefix to store data in. prefix = (args.s3_prefix or args.wabs_prefix or os.getenv('WALE_S3_PREFIX') or os.getenv('WALE_WABS_PREFIX') or os.getenv('WALE_SWIFT_PREFIX')) if prefix is None: raise UserException( msg='no storage prefix defined', hint=( 'Either set one of the --wabs-prefix or --s3-prefix options or' ' define one of the WALE_WABS_PREFIX, WALE_S3_PREFIX, or ' 'WALE_SWIFT_PREFIX environment variables.' ) ) store = storage.StorageLayout(prefix) # GPG can be optionally layered atop of every backend, so a common # code path suffices. gpg_key_id = args.gpg_key_id or os.getenv('WALE_GPG_KEY_ID') if gpg_key_id is not None: external_program_check([GPG_BIN]) # Enumeration of reading in configuration for all supported # backend data stores, yielding value adhering to the # 'operator.Backup' protocol. if store.is_s3: if args.aws_instance_profile: creds = s3_instance_profile(args) else: creds = s3_explicit_creds(args) from wal_e.operator import s3_operator return s3_operator.S3Backup(store, creds, gpg_key_id) elif store.is_wabs: account_name = args.wabs_account_name or os.getenv('WABS_ACCOUNT_NAME') if account_name is None: raise UserException( msg='WABS account name is undefined', hint=_config_hint_generate('wabs-account-name', True)) access_key = os.getenv('WABS_ACCESS_KEY') if access_key is None: raise UserException( msg='WABS access key credential is required but not provided', hint=_config_hint_generate('wabs-access-key', False)) from wal_e.blobstore import wabs from wal_e.operator.wabs_operator import WABSBackup creds = wabs.Credentials(account_name, access_key) return WABSBackup(store, creds, gpg_key_id) elif store.is_swift: from wal_e.blobstore import swift from wal_e.operator.swift_operator import SwiftBackup creds = swift.Credentials( os.getenv('SWIFT_AUTHURL'), os.getenv('SWIFT_USER'), os.getenv('SWIFT_PASSWORD'), os.getenv('SWIFT_TENANT'), os.getenv('SWIFT_REGION'), os.getenv('SWIFT_ENDPOINT_TYPE', 'publicURL'), ) return SwiftBackup(store, creds, gpg_key_id) else: raise UserCritical( msg='no unsupported blob stores should get here', hint='Report a bug.')
try: import boto assert boto except ImportError: from wal_e.exception import UserException raise UserException( msg='AWS support requires module "boto"', hint='Try running "pip install boto".') from wal_e.blobstore.s3.s3_credentials import Credentials from wal_e.blobstore.s3.s3_credentials import InstanceProfileCredentials from wal_e.blobstore.s3.s3_util import do_lzop_get from wal_e.blobstore.s3.s3_util import sigv4_check_apply from wal_e.blobstore.s3.s3_util import uri_get_file from wal_e.blobstore.s3.s3_util import uri_put_file from wal_e.blobstore.s3.s3_util import write_and_return_error __all__ = [ 'Credentials', 'InstanceProfileCredentials', 'do_lzop_get', 'sigv4_check_apply', 'uri_put_file', 'uri_get_file', 'write_and_return_error', ]
try: import azure assert azure except ImportError: from wal_e.exception import UserException raise UserException( msg='wabs support requires module "azure"', hint='Try running "pip install azure".') from wal_e.blobstore.wabs.shim import BlobService from wal_e.blobstore.wabs.wabs_credentials import Credentials from wal_e.blobstore.wabs.wabs_util import do_lzop_get from wal_e.blobstore.wabs.wabs_util import uri_get_file from wal_e.blobstore.wabs.wabs_util import uri_put_file from wal_e.blobstore.wabs.wabs_util import write_and_return_error __all__ = [ 'BlobService', 'Credentials', 'do_lzop_get', 'uri_get_file', 'uri_put_file', 'write_and_return_error', ]
def database_fetch(self, pg_cluster_dir, backup_name, blind_restore, restore_spec, pool_size): if os.path.exists(os.path.join(pg_cluster_dir, 'postmaster.pid')): hint = ('Shut down postgres. If there is a stale lockfile, ' 'then remove it after being very sure postgres is not ' 'running.') raise UserException( msg='attempting to overwrite a live data directory', detail='Found a postmaster.pid lockfile, and aborting', hint=hint) bl = self._backup_list(False) backups = list(bl.find_all(backup_name)) assert len(backups) <= 1 if len(backups) == 0: raise UserException( msg='no backups found for fetching', detail=('No backup matching the query {0} ' 'was able to be located.'.format(backup_name))) elif len(backups) > 1: raise UserException( msg='more than one backup found for fetching', detail=('More than one backup matching the query {0} was able ' 'to be located.'.format(backup_name)), hint='To list qualifying backups, ' 'try "wal-e backup-list QUERY".') # There must be exactly one qualifying backup at this point. assert len(backups) == 1 assert backups[0] is not None backup_info = backups[0] backup_info.load_detail(self.new_connection()) self.layout.basebackup_tar_partition_directory(backup_info) if restore_spec is not None: if restore_spec != 'SOURCE': if not os.path.isfile(restore_spec): raise UserException( msg='Restore specification does not exist', detail='File not found: %s'.format(restore_spec), hint=('Provide valid json-formatted restoration ' 'specification, or pseudo-name "SOURCE" to ' 'restore using the specification from the ' 'backup progenitor.')) with open(restore_spec, 'r') as fs: spec = json.load(fs) backup_info.spec.update(spec) if 'base_prefix' not in backup_info.spec \ or not backup_info.spec['base_prefix']: backup_info.spec['base_prefix'] = pg_cluster_dir self._build_restore_paths(backup_info.spec) else: # If the user hasn't passed in a restoration specification # use pg_cluster_dir as the resore prefix backup_info.spec['base_prefix'] = pg_cluster_dir if not blind_restore: self._verify_restore_paths(backup_info.spec) connections = [] for i in range(pool_size): connections.append(self.new_connection()) partition_iter = self.worker.TarPartitionLister( connections[0], self.layout, backup_info) assert len(connections) == pool_size fetchers = [] for i in range(pool_size): fetchers.append( self.worker.BackupFetcher(connections[i], self.layout, backup_info, backup_info.spec['base_prefix'], (self.gpg_key_id is not None))) assert len(fetchers) == pool_size p = gevent.pool.Pool(size=pool_size) fetcher_cycle = itertools.cycle(fetchers) for part_name in partition_iter: p.spawn( self._exception_gather_guard( next(fetcher_cycle).fetch_partition), part_name) p.join(raise_error=True)
try: import google.cloud assert google.cloud except ImportError: from wal_e.exception import UserException raise UserException( msg='Google support requires the module "google-cloud-storage" ', hint='Try running "pip install google-cloud-storage') from wal_e.blobstore.gs.credentials import Credentials from wal_e.blobstore.gs.utils import (do_lzop_get, uri_put_file, uri_get_file, write_and_return_error) __all__ = [ 'Credentials', 'do_lzop_get', 'uri_put_file', 'uri_get_file', 'write_and_return_error' ]
def configure_backup_cxt(args): # Try to find some WAL-E prefix to store data in. prefix = (args.file_prefix or args.gs_prefix or args.s3_prefix or args.wabs_prefix or os.getenv('WALE_FILE_PREFIX') or os.getenv('WALE_GS_PREFIX') or os.getenv('WALE_S3_PREFIX') or os.getenv('WALE_SWIFT_PREFIX') or os.getenv('WALE_WABS_PREFIX')) if prefix is None: raise UserException(msg='no storage prefix defined', hint=('Either set one of the' ' --file-prefix,' ' --gs-prefix,' ' --s3-prefix or' ' --wabs-prefix options' ' or define one of the' ' WALE_FILE_PREFIX,' ' WALE_GS_PREFIX,' ' WALE_S3_PREFIX,' ' WALE_SWIFT_PREFIX or' ' WALE_WABS_PREFIX,' ' environment variables.')) store = storage.StorageLayout(prefix) # GPG can be optionally layered atop of every backend, so a common # code path suffices. gpg_key_id = args.gpg_key_id or os.getenv('WALE_GPG_KEY_ID') if gpg_key_id is not None: external_program_check([GPG_BIN]) # Enumeration of reading in configuration for all supported # backend data stores, yielding value adhering to the # 'operator.Backup' protocol. if store.is_s3: use_instance_profile = args.aws_instance_profile or \ parse_boolean_envvar(os.getenv('AWS_INSTANCE_PROFILE')) if use_instance_profile: creds = s3_instance_profile() else: creds = s3_explicit_creds(args) from wal_e.blobstore import s3 s3.sigv4_check_apply() from wal_e.operator import s3_operator return s3_operator.S3Backup(store, creds, gpg_key_id) elif store.is_wabs: account_name = args.wabs_account_name or os.getenv('WABS_ACCOUNT_NAME') if account_name is None: raise UserException(msg='WABS account name is undefined', hint=_config_hint_generate( 'wabs-account-name', True)) access_key = os.getenv('WABS_ACCESS_KEY') access_token = os.getenv('WABS_SAS_TOKEN') if not (access_key or access_token): raise UserException( msg='WABS access credentials is required but not provided', hint=('Define one of the WABS_ACCESS_KEY or ' 'WABS_SAS_TOKEN environment variables.')) from wal_e.blobstore import wabs from wal_e.operator.wabs_operator import WABSBackup creds = wabs.Credentials(account_name, access_key, access_token) return WABSBackup(store, creds, gpg_key_id) elif store.is_swift: from wal_e.blobstore import swift from wal_e.operator.swift_operator import SwiftBackup creds = swift.Credentials( os.getenv('SWIFT_AUTHURL'), os.getenv('SWIFT_USER'), os.getenv('SWIFT_PASSWORD'), os.getenv('SWIFT_TENANT'), os.getenv('SWIFT_REGION'), os.getenv('SWIFT_ENDPOINT_TYPE', 'publicURL'), os.getenv('SWIFT_AUTH_VERSION', '2'), os.getenv('SWIFT_DOMAIN_ID'), os.getenv('SWIFT_DOMAIN_NAME'), os.getenv('SWIFT_TENANT_ID'), os.getenv('SWIFT_USER_ID'), os.getenv('SWIFT_USER_DOMAIN_ID'), os.getenv('SWIFT_USER_DOMAIN_NAME'), os.getenv('SWIFT_PROJECT_ID'), os.getenv('SWIFT_PROJECT_NAME'), os.getenv('SWIFT_PROJECT_DOMAIN_ID'), os.getenv('SWIFT_PROJECT_DOMAIN_NAME'), ) return SwiftBackup(store, creds, gpg_key_id) elif store.is_gs: from wal_e.operator.gs_operator import GSBackup return GSBackup(store, gpg_key_id) elif store.is_file: from wal_e.blobstore import file from wal_e.operator.file_operator import FileBackup creds = file.Credentials() return FileBackup(store, creds, gpg_key_id) else: raise UserCritical(msg='no unsupported blob stores should get here', hint='Report a bug.')
def external_program_check(to_check=frozenset([PSQL_BIN, LZOP_BIN, PV_BIN])): """ Validates the existence and basic working-ness of other programs Implemented because it is easy to get confusing error output when one does not install a dependency because of the fork-worker model that is both necessary for throughput and makes more obscure the cause of failures. This is intended to be a time and frustration saving measure. This problem has confused The Author in practice when switching rapidly between machines. """ could_not_run = [] error_msgs = [] def psql_err_handler(popen): assert popen.returncode != 0 error_msgs.append( textwrap.fill('Could not get a connection to the database: ' 'note that superuser access is required')) # Bogus error message that is re-caught and re-raised raise EnvironmentError('INTERNAL: Had problems running psql ' 'from external_program_check') with open(os.devnull, 'wb') as nullf: for program in to_check: try: if program is PSQL_BIN: psql_csv_run('SELECT 1', error_handler=psql_err_handler) else: if program is PV_BIN: extra_args = ['--quiet'] else: extra_args = [] proc = popen_sp([program] + extra_args, stdout=nullf, stderr=nullf, stdin=subprocess.PIPE) # Close stdin for processes that default to # reading from the pipe; the programs WAL-E uses # of this kind will terminate in this case. proc.stdin.close() proc.wait() except EnvironmentError: could_not_run.append(program) if could_not_run: error_msgs.append( 'Could not run the following programs, are they installed? ' + ', '.join(could_not_run)) if error_msgs: raise UserException( 'could not run one or more external programs WAL-E depends upon', '\n'.join(error_msgs)) return None
try: import gcloud assert gcloud except ImportError: from wal_e.exception import UserException raise UserException(msg='Google support requires the module "gcloud" ', hint='Try running "pip install gcloud') from wal_e.blobstore.gs.credentials import Credentials from wal_e.blobstore.gs.utils import (do_lzop_get, uri_put_file, uri_get_file, write_and_return_error) __all__ = [ 'Credentials', 'do_lzop_get', 'uri_put_file', 'uri_get_file', 'write_and_return_error' ]