class Hashsum(FileFilter): """Add a new file (default: 'hashes.txt') with the hash of all backuped files.""" parameters = FileFilter.parameters + [ Parameter('method', converter=CheckOption(['sha1', 'md5', 'sha256']), help_str='method: sha1, md5 or sha256'), Parameter('filename', help_str='index file (default to \'hashes.txt\')'), ] work_in_place = True def __init__(self, name, method='sha1', filename='hashes.txt', **kwargs): super(Hashsum, self).__init__(name, **kwargs) self.method = method self.filename = filename def do_restore(self, previous_path, next_path, private_path, allow_in_place=True): cmd_str = { 'sha1': 'shasum -a 1 -c', 'md5': 'md5sum -c', 'sha256': 'shasum -a 256 -c' }[self.method] index_path = os.path.abspath(os.path.join(next_path, self.filename)) cmd = shlex.split(cmd_str) + [index_path] self.execute_command(cmd, cwd=next_path) def do_backup(self, previous_path, next_path, private_path, allow_in_place=True): cmd = { 'sha1': 'shasum -a 1 -b', 'md5': 'md5sum -b', 'sha256': 'shasum -a 256 -b' }[self.method] index_path = os.path.abspath(os.path.join(next_path, self.filename)) fd = codecs.open(os.devnull, 'w', encoding='utf-8') if self.can_execute_command(['rm', index_path]): fd = codecs.open(index_path, 'w', encoding='utf-8') for root, dirnames, filenames in os.walk(next_path): for filename in filenames: src_path = os.path.abspath(os.path.join(root, filename)) if src_path == index_path: continue hash_obj = getattr(hashlib, self.method)() with open(src_path, 'rb') as src_fd: for data in iter(lambda: src_fd.read(16384), b''): hash_obj.update(data) if self.can_execute_command('%s %s >> %s' % (cmd, src_path, index_path)): fd.write("%s *%s\n" % (hash_obj.hexdigest(), os.path.relpath(src_path, next_path))) fd.close()
class PostgresSQL(MySQL): """Dump the content of a PostgresSQL database with the pg_dump utility to a filename in the collect point. Require the 'pg_dump' and 'psql' utilities.""" parameters = MySQL.parameters[:-2] + [ Parameter( 'dump_executable', converter=check_executable, help_str='path of the pg_dump executable (default: "pg_dump")'), Parameter('restore_executable', converter=check_executable, help_str='path of the psql executable (default: "psql")'), ] def __init__(self, name, collect_point, port='5432', dump_executable='pg_dump', restore_executable='psql', **kwargs): super(PostgresSQL, self).__init__(name, collect_point, port=port, dump_executable=dump_executable, restore_executable=restore_executable, **kwargs) def get_dump_cmd_list(self): command = [self.dump_executable] if self.user: command += ['--username=%s' % self.user] if self.host: command += ['--host=%s' % self.host] if self.port: command += ['--port=%s' % self.port] command += [self.database] return command def get_env(self): """Extra environment variables to be passed to shell execution""" if self.password: return {'PGPASSWORD': self.password} return {}
class Point(ParameterizedObject): parameters = ParameterizedObject.parameters + [ Parameter( 'check_out_of_date_backup', 'frequency', converter=get_is_time_elapsed, help_str= 'frequency of backup operations. Can be an integer (number of seconds),\n' '"monthly:d" (at least the d-th day of each month, d = 0..28),\n' '"weekly:d" (the d-th day of each week, d = 0..6),\n' '"weekly" or "daily" (once a week or once a day),\n' '"daily:h" (the h-th hour of each day, h = 0..23)'), ] def __init__(self, name, check_out_of_date_backup=None, **kwargs): super(Point, self).__init__(name, **kwargs) self.check_out_of_date_backup = check_out_of_date_backup or get_is_time_elapsed( None) self.filters = [] # list of `polyarchiv.filters.FileFilter` self.hooks = [] # list of `polyarchiv.hooks.Hook` def add_filter(self, filter_): from polyarchiv.filters import FileFilter assert isinstance(filter_, FileFilter) self.filters.append(filter_) def add_hook(self, hook): from polyarchiv.hooks import Hook assert isinstance(hook, Hook) self.hooks.append(hook) if hook.keep_output and not self.output_temp_fd: self.output_temp_fd = tempfile.TemporaryFile() @property def stderr(self): if self.verbosity >= 3 and not self.output_temp_fd: return None elif self.output_temp_fd: return self.output_temp_fd self.output_temp_fd = open(os.devnull, 'wb') return self.output_temp_fd @property def stdout(self): if self.verbosity >= 3 and not self.output_temp_fd: return None elif self.output_temp_fd: return self.output_temp_fd self.output_temp_fd = open(os.devnull, 'wb') return self.output_temp_fd
class ArchiveRepository(FileRepository): """Create an archive (.tar.gz, .tar.xz or .tar.bz2) with files collected from all sources.""" parameters = FileRepository.parameters + [ Parameter('archive_name', converter=check_archive, help_str='Name of the created archive, must end by .tar.gz, ' '.tar.bz2 or .tar.xz. Default: "archive.tar.gz"[*]') ] def __init__(self, name, archive_name='archive.tar.gz', **kwargs): super(ArchiveRepository, self).__init__(name=name, **kwargs) self.archive_name = archive_name def post_source_backup(self): super(ArchiveRepository, self).post_source_backup() self.ensure_dir(self.private_data_path) comp = 'j' archive_name = self.format_value(self.archive_name) if archive_name.endswith('.tar.gz'): comp = 'z' elif archive_name.endswith('.tar.xz'): comp = 'x' file_list = os.listdir(self.import_data_path) full_path = os.path.join(self.private_data_path, archive_name) if file_list: self.execute_command(['tar', '-c%sf' % comp, full_path] + file_list, cwd=self.import_data_path) elif self.can_execute_command(['tar', '-c%sf' % comp, full_path]): mode = {'j': 'w:bz2', 'x': 'w:xz', 'z': 'w:gz'}[comp] tarfile.open(name=full_path, mode=mode).close() if self.can_execute_command(['rm', '-rf', self.import_data_path]): shutil.rmtree(self.import_data_path) @property def private_data_path(self): path = os.path.join(self.local_path, 'archives') return self.format_value(path) def pre_source_restore(self): archive_name = self.format_value(self.archive_name) full_path = os.path.join(self.private_data_path, archive_name) path = self.import_data_path if (os.path.isdir(path) and os.listdir(path)) and self.can_execute_command( ['rm', '-rf', path]): shutil.rmtree(path) self.ensure_dir(path) self.execute_command(['tar', '-C', path, '-xf', full_path])
class LogHook(Hook): """store PolyArchiv's output to the given path. Be sure to set `keep_output` to `y`.""" parameters = Hook.parameters + [ Parameter('path', required=True, help_str='path of the log file [*]'), ] def __init__(self, name, runner, path=None, **kwargs): super(LogHook, self).__init__(name, runner, **kwargs) self.path = path def call(self, when, cm, collect_point_results, backup_point_results): assert isinstance(cm, FileContentMonitor) path = self.format_value(self.path) with open(path, 'wb') as fd: cm.copy_content(fd, close=False)
class Config(object): parameters = [ Parameter('rsync_executable', converter=check_executable, help_str='full path of the "rsync" executable'), Parameter('curl_executable', converter=check_executable, help_str='full path of the "curl" executable'), Parameter('scp_executable', converter=check_executable, help_str='full path of the "scp" executable'), Parameter('ssh_executable', converter=check_executable, help_str='full path of the "ssh" executable'), Parameter('tar_executable', converter=check_executable, help_str='full path of the "tar" executable'), Parameter('svn_executable', converter=check_executable, help_str='full path of the "svn" executable'), ] def __init__(self, command_display=True, command_confirm=False, command_execute=True, command_keep_output=False, rsync_executable='rsync', curl_executable='curl', git_executable='git', scp_executable='scp', ssh_executable='ssh', tar_executable='tar', svn_executable='svn'): self.command_display = command_display # display each command before running it self.command_confirm = command_confirm # ask the user to confirm each command self.command_execute = command_execute # actually run commands (if False: 'dry' mode) self.command_keep_output = command_keep_output # display all command outputs on stderr/stdout self.rsync_executable = rsync_executable self.curl_executable = curl_executable self.git_executable = git_executable self.scp_executable = scp_executable self.ssh_executable = ssh_executable self.tar_executable = tar_executable self.svn_executable = svn_executable
class SymmetricCrypt(FileFilter): """Encrypt all files with symmetric encryption and a password (using GPG). The only required parameter is the password. Require 'gpg' to be installed and in $PATH. """ parameters = FileFilter.parameters + [ Parameter('gpg_executable', converter=check_executable, help_str='path of the gpg executable (default: "gpg")'), Parameter('password', help_str='password to encrypt data'), ] work_in_place = False def __init__(self, name, password='******', gpg_executable='gpg', **kwargs): super(SymmetricCrypt, self).__init__(name, **kwargs) self.password = password self.gpg_executable = gpg_executable def do_backup(self, previous_path, next_path, private_path, allow_in_place=True): symlinks = True if os.listdir(next_path): if self.can_execute_command(['rm', '-rf', next_path]): shutil.rmtree(next_path) if self.can_execute_command(['mkdir', '-p', next_path]): os.makedirs(next_path) for root, dirnames, filenames in os.walk(previous_path): for src_dirname in dirnames: clear_path = os.path.join(root, src_dirname) crypted_path = os.path.join( next_path, os.path.relpath(clear_path, previous_path)) if self.can_execute_command(['mkdir', '-p', crypted_path]): os.makedirs(crypted_path) shutil.copystat(clear_path, crypted_path) for src_filename in filenames: clear_path = os.path.join(root, src_filename) crypted_path = os.path.join( next_path, os.path.relpath(clear_path, previous_path)) if symlinks and os.path.islink(clear_path): linkto = os.readlink(clear_path) if self.can_execute_command( ['ln', '-s', linkto, crypted_path]): os.symlink(linkto, crypted_path) else: cmd = [ 'gpg', '--passphrase', self.password, '-o', crypted_path, '-c', clear_path ] return_code, __, __ = self.execute_command( cmd, stderr=self.stderr, stdout=self.stdout) if return_code == 0 and os.path.isfile( crypted_path) and os.path.isfile(clear_path): shutil.copystat(clear_path, crypted_path) def do_restore(self, previous_path, next_path, private_path, allow_in_place=True): symlinks = True if os.listdir(previous_path): if self.can_execute_command(['rm', '-rf', previous_path]): shutil.rmtree(previous_path) if self.can_execute_command(['mkdir', '-p', previous_path]): os.makedirs(previous_path) for root, dirnames, filenames in os.walk(next_path): for src_dirname in dirnames: crypted_path = os.path.join(root, src_dirname) clear_path = os.path.join( previous_path, os.path.relpath(crypted_path, next_path)) if self.can_execute_command(['mkdir', '-p', clear_path]): os.makedirs(clear_path) shutil.copystat(crypted_path, clear_path) for src_filename in filenames: crypted_path = os.path.join(root, src_filename) clear_path = os.path.join( previous_path, os.path.relpath(crypted_path, next_path)) if symlinks and os.path.islink(crypted_path): linkto = os.readlink(crypted_path) if self.can_execute_command( ['ln', '-s', linkto, clear_path]): os.symlink(linkto, clear_path) else: cmd = [ 'gpg', '--passphrase', self.password, '-o', clear_path, '--decrypt', crypted_path ] return_code, __, __ = self.execute_command(cmd) if return_code == 0 and os.path.isfile(clear_path): shutil.copystat(crypted_path, clear_path)
class GitRepository(FileRepository): """Create a local git repository. Collect files from all sources and commit them locally. """ parameters = FileRepository.parameters + [ Parameter( 'commit_email', help_str='user email used for signing commits (default: "%s") [*]' % DEFAULT_EMAIL), Parameter( 'commit_name', help_str='user name used for signing commits (default: "%s") [*]' % DEFAULT_USERNAME), Parameter( 'commit_message', help_str= 'commit message (default: "Backup {Y}/{m}/{d} {H}:{M}") [*]'), ] def __init__(self, name, commit_name=DEFAULT_USERNAME, commit_email=DEFAULT_EMAIL, commit_message='Backup {Y}/{m}/{d} {H}:{M}', **kwargs): super(GitRepository, self).__init__(name=name, **kwargs) self.commit_name = commit_name self.commit_email = commit_email self.commit_message = commit_message def post_source_backup(self): super(GitRepository, self).post_source_backup() git_config_path = os.path.join(self.metadata_path, '.gitconfig') if not os.path.isfile(git_config_path): self.execute_command([ self.config.git_executable, 'config', '--global', 'user.email', self.format_value(self.commit_email) ], env={'HOME': self.metadata_path}) self.execute_command([ self.config.git_executable, 'config', '--global', 'user.name', self.format_value(self.commit_name) ], env={'HOME': self.metadata_path}) os.chdir(self.import_data_path) self.execute_command([self.config.git_executable, 'init'], cwd=self.import_data_path) self.execute_command([self.config.git_executable, 'add', '.']) self.execute_command([ self.config.git_executable, 'commit', '-am', self.format_value(self.commit_message) ], ignore_errors=True, env={'HOME': self.metadata_path}) def pre_source_restore(self): os.chdir(self.import_data_path) self.execute_command([self.config.git_executable, 'reset', '--hard'], cwd=self.import_data_path, env={'HOME': self.metadata_path}) self.execute_command([self.config.git_executable, 'clean', '-f'], cwd=self.import_data_path, env={'HOME': self.metadata_path})
class HttpHook(Hook): """Perform a HTTP request. """ default_body = '' parameters = Hook.parameters + [ Parameter('url', required=True, help_str='requested URL [*]'), Parameter('method', help_str='HTTP method (default to "GET")'), Parameter('body', help_str='request body (empty by default) [*]'), Parameter('username', help_str='HTTP username [*]'), Parameter('password', help_str='HTTP password [*]'), Parameter('keyfile', help_str='client PEM key file [*]'), Parameter('certfile', help_str='client PEM cert file [*]'), Parameter( 'cafile', help_str= 'CA cert PEM file, or "ignore" to ignore invalid certificates [*]' ), Parameter('proxy_url', help_str='Proxy URL [*]'), Parameter( 'headers', help_str= 'custom headers, space-separated, e.g. HEADER1=VALUE HEADER2="VA LUE"' ), ] def __init__(self, name, runner, url='', method='GET', body=default_body, username=None, password=None, keyfile=None, certfile=None, cafile=None, proxy_url=None, headers='', **kwargs): super(HttpHook, self).__init__(name, runner, **kwargs) self.url = url self.method = method self.body = body self.username = username self.password = password self.keyfile = keyfile self.certfile = certfile self.cafile = cafile self.proxy_url = proxy_url self.headers = headers def call(self, when, cm, collect_point_results, backup_point_results): self.set_extra_variables(cm, collect_point_results, backup_point_results) kwargs = {} body = self.format_value(self.body) if body: kwargs['data'] = body keyfile, certfile = self.format_value(self.keyfile), self.format_value( self.certfile) if keyfile and certfile: kwargs['cert'] = (certfile, keyfile) cafile = self.format_value(self.cafile) if cafile == 'ignore': kwargs['verify'] = False elif cafile and os.path.isfile(cafile): kwargs['verify'] = cafile else: kwargs['verify'] = True proxy_url = self.format_value(self.proxy_url) if proxy_url: kwargs['proxy'] = {'http': proxy_url, 'https': proxy_url} username = self.format_value(self.username) password = self.format_value(self.password) if username and password: kwargs['auth'] = HTTPBasicAuth(username, password) headers = {} for splitted in shlex.split(self.format_value(self.headers)): header_name, sep, header_value = splitted.partition('=') if sep == '=': headers[header_name] = header_value if headers: kwargs['headers'] = headers url = self.format_value(self.url) req = requests.request(self.method, url, **kwargs) if req.status_code < 300: self.print_error('Request %s returned a %d code' % (url, req.status_code)) req.close()
class FileRepository(CollectPoint): """Collect files from all sources in the folder 'local_path'. """ parameters = CollectPoint.parameters + [ Parameter( 'local_path', converter=check_directory, required=True, help_str='absolute path where all data are locally gathered [*]') ] METADATA_FOLDER = 'metadata' def __init__(self, name, local_path='.', **kwargs): super(FileRepository, self).__init__(name=name, **kwargs) self.local_path = local_path def pre_source_backup(self): self.ensure_dir(self.import_data_path) @cached_property def import_data_path(self): path = self.format_value(os.path.join(self.local_path, 'backups')) return path @cached_property def private_data_path(self): """where all exported data are actually stored""" return self.import_data_path @cached_property def metadata_path(self): path = os.path.join(self.local_path, self.METADATA_FOLDER, 'collect_point') path = self.format_value(path) self.ensure_dir(path) return path @cached_property def lock_filepath(self): return os.path.join(self.metadata_path, 'lock') @lru_cache() def backup_point_private_path(self, backup_point): path = os.path.join(self.local_path, self.METADATA_FOLDER, 'remote-%s' % backup_point.name) return self.format_value(path) @lru_cache() def filter_private_path(self, filter_): path = os.path.join(self.local_path, self.METADATA_FOLDER, 'filter-%s' % filter_.name) return self.format_value(path) def get_info(self): path = os.path.join(self.metadata_path, '%s.json' % self.name) self.ensure_dir(path, parent=True) if os.path.isfile(path): with codecs.open(path, 'r', encoding='utf-8') as fd: content = fd.read() return PointInfo.from_str(content) else: return PointInfo() def set_info(self, info): assert isinstance(info, PointInfo) path = os.path.join(self.metadata_path, '%s.json' % self.name) self.ensure_dir(path, parent=True) content = info.to_str() with codecs.open(path, 'w', encoding='utf-8') as fd: fd.write(content) def get_lock(self): self.ensure_dir(self.lock_filepath, parent=True) lock_ = Lock(self.lock_filepath) if lock_.acquire(timeout=1): return lock_ else: self.print_error( 'Unable to lock collect point. Check if no other backup is currently running or ' 'delete %s' % self.lock_filepath) raise ValueError def get_repository_size(self): content = subprocess.check_output(['du', '-s'], cwd=self.local_path).decode() matcher = re.match('^(\d+) \.$', content.strip()) if not matcher: return 0 return int(matcher.group(1)) def release_lock(self, lock_): lock_.release() def pre_source_restore(self): pass def post_source_restore(self): pass
class RemoteFiles(Source): """copy the remote files from the given server/source_path to the collect point. The destination is a folder inside the collect point. Require 'rsync'. """ parameters = Source.parameters + [ Parameter( 'source_url', required=True, help_str= 'synchronize data from this URL. Must ends by a folder name'), Parameter('destination_path', help_str='destination folder (like "./remote-files")', required=True), Parameter( 'private_key', help_str='private key or certificate associated to \'remote_url\'' ), Parameter('ca_cert', help_str='CA certificate associated to \'remote_url\'. ' 'Set to "any" for not checking certificates'), Parameter('ssh_options', help_str='SSH options associated to \'url\''), Parameter( 'keytab', converter=check_file, help_str= 'absolute path of the keytab file (for Kerberos authentication)'), ] def __init__(self, name, collect_point, source_url='', destination_path='', keytab=None, private_key=None, ca_cert=None, ssh_options=None, **kwargs): """ :param collect_point: collect point where files are stored :param source_url: remote folders to add to the collect point :param destination_path: relative path of the backup destination (must be a directory name, e.g. "data") """ super(RemoteFiles, self).__init__(name, collect_point, **kwargs) self.destination_path = destination_path self.source_url = source_url self.keytab = keytab self.private_key = private_key self.ca_cert = ca_cert self.ssh_options = ssh_options def backup(self): backend = self._get_backend() dirname = os.path.join(self.collect_point.import_data_path, self.destination_path) backend.sync_dir_to_local(dirname) def _get_backend(self): backend = get_backend(self.collect_point, self.source_url, keytab=self.keytab, private_key=self.private_key, ca_cert=self.ca_cert, ssh_options=self.ssh_options, config=self.config) return backend def restore(self): backend = self._get_backend() dirname = os.path.join(self.collect_point.import_data_path, self.destination_path) backend.sync_dir_from_local(dirname)
class SvnRepository(FileRepository): """Collect files from all sources in the folder 'local_path' and commit them to a remote SVN repository. """ parameters = FileRepository.parameters + [ Parameter( 'remote_url', required=True, help_str= 'URL of the remote repository (must exist). Should contain username and password [*]' ), Parameter('ca_cert', help_str='CA certificate associated to \'remote_url\'. ' 'Set to "any" for not checking certificates [*]'), Parameter( 'client_cert', help_str='Client certificate associated to \'remote_url\' [*]'), Parameter('client_cert_password', help_str='Password for encrypted client certificates [*]'), Parameter( 'commit_message', help_str= 'commit message (default: "Backup {Y}/{m}/{d} {H}:{M}") [*]'), ] checks = FileRepository.checks + [ValidSvnUrl('remote_url')] def __init__(self, name, remote_url=None, ca_cert=None, client_cert=None, client_cert_password=None, commit_message='Backup {Y}/{m}/{d} {H}:{M}', **kwargs): super(SvnRepository, self).__init__(name=name, **kwargs) remote_url, username, password = url_auth_split( self.format_value(remote_url)) self.username = username self.password = password self.ca_cert = ca_cert self.remote_url = remote_url self.client_cert = client_cert self.commit_message = commit_message self.client_cert_password = client_cert_password @cached_property def svn_folder(self): return os.path.join(self.import_data_path, '.svn') def release_lock(self, lock_): lock_.release() def pre_source_backup(self): if not os.path.isdir(self.svn_folder): cmd = [ self.config.svn_executable, 'co', '--ignore-externals', '--force', ] cmd += self.__svn_parameters() cmd += [self.remote_url, self.import_data_path] self.execute_command(cmd) def post_source_backup(self): cmd = [self.config.svn_executable, 'status'] p = subprocess.Popen(cmd, cwd=self.import_data_path, stdout=subprocess.PIPE, stderr=open(os.devnull, 'wb')) stdout, stderr = p.communicate() to_add = [] to_remove = [] for line in stdout.decode('utf-8').splitlines(): matcher = re.match( r'^([ ADMRCXI?!~])[ MC][ L][ +][ S][ KOTB][ C] (?P<name>.*)$', line) if not matcher: continue status, name = matcher.groups() if status == '?': to_add.append(name) elif status == '!': to_remove.append(name) if to_add: self.execute_command([self.config.svn_executable, 'add'] + to_add, cwd=self.import_data_path) if to_remove: self.execute_command( [self.config.svn_executable, 'rm', '--force'] + to_remove, cwd=self.import_data_path) message = self.format_value(self.commit_message) cmd = [self.config.svn_executable, 'ci', '-m', message] cmd += self.__svn_parameters() self.execute_command(cmd, cwd=self.import_data_path) def __svn_parameters(self): result = ['--non-interactive', '--no-auth-cache'] if self.username: result += ['--username', self.username] if self.password: result += ['--password', self.password] ca_cert = self.format_value(self.ca_cert) if ca_cert == 'any': result += ['--trust-server-cert'] elif ca_cert: result += [ '--config-option', 'servers:global:ssl-authority-files=%s' % ca_cert ] client_cert = self.format_value(self.client_cert) if client_cert: result += [ '--config-option', 'servers:global:ssl-client-cert-file=%s' % client_cert ] client_cert_password = self.format_value(self.client_cert_password) if client_cert_password: result += [ '--config-option', 'servers:global:ssl-client-cert-password=%s' % client_cert_password ] return result def pre_source_restore(self): self.pre_source_backup() cmd = [ self.config.svn_executable, 'up', '-r', 'HEAD', '--ignore-externals', '--force', '--accept', 'theirs-conflict', ] cmd += self.__svn_parameters() self.execute_command(cmd, cwd=self.import_data_path)
class MySQL(Source): """Dump the content of a MySQL database with the mysqldump utility to a filename in the collect point. Require the 'mysql' and 'mysqldump' utilities. """ parameters = Source.parameters + [ Parameter('host', help_str='database host'), Parameter('port', converter=int, help_str='database port'), Parameter('sudo_user', help_str='sudo user, used for all SQL operations', converter=check_username), Parameter('user', help_str='database user'), Parameter('password', help_str='database password'), Parameter('database', help_str='name of the backuped database', required=True), Parameter( 'destination_path', help_str= 'relative path of the backup destination (e.g. "database.sql")'), Parameter( 'dump_executable', converter=check_executable, help_str='path of the mysqldump executable (default: "mysqldump")' ), Parameter('restore_executable', converter=check_executable, help_str='path of the mysql executable (default: "mysql")'), ] def __init__(self, name, collect_point, host='localhost', port='3306', user='', password='', database='', destination_path='mysql_dump.sql', sudo_user=None, dump_executable='mysqldump', restore_executable='mysql', **kwargs): super(MySQL, self).__init__(name, collect_point, **kwargs) self.sudo_user = sudo_user self.restore_executable = restore_executable self.dump_executable = dump_executable self.host = host self.port = port self.user = user self.password = password self.database = database self.destination_path = destination_path def backup(self): filename = os.path.join(self.collect_point.import_data_path, self.destination_path) self.ensure_dir(filename, parent=True) cmd = self.get_dump_cmd_list() if self.sudo_user: cmd = ['sudo', '-u', self.sudo_user] + cmd env = os.environ.copy() env.update(self.get_env()) for k, v in list(self.get_env().items()): self.print_command('%s=%s' % (k, v)) if not self.can_execute_command(cmd + ['>', filename]): filename = os.devnull # run the dump even in dry mode with open(filename, 'wb') as fd: p = subprocess.Popen(cmd, env=env, stdout=fd, stderr=self.stderr) p.communicate() if p.returncode != 0: raise subprocess.CalledProcessError(p.returncode, cmd[0]) def restore(self): filename = os.path.join(self.collect_point.import_data_path, self.destination_path) if not os.path.isfile(filename): return cmd = self.get_restore_cmd_list() if self.sudo_user: cmd = ['sudo', '-u', self.sudo_user] + cmd env = os.environ.copy() env.update(self.get_env()) for k, v in list(self.get_env().items()): self.print_command('%s=%s' % (k, v)) # noinspection PyTypeChecker with open(filename, 'rb') as fd: self.execute_command(cmd, env=env, stdin=fd, stderr=self.stderr, stdout=self.stdout) def get_dump_cmd_list(self): """ :return: :rtype: :class:`list` of :class:`str` """ command = [self.dump_executable] if self.user: command += ['--user=%s' % self.user] if self.password: command += ['--password=%s' % self.password] if self.host: command += ['--host=%s' % self.host] if self.port: command += ['--port=%s' % self.port] command += [self.database] return command def get_restore_cmd_list(self): """ :return: :rtype: :class:`list` of :class:`str` """ command = self.get_dump_cmd_list() command[0] = self.restore_executable return command def get_env(self): """Extra environment variables to be passed to shell execution""" return {}
class Synchronize(CommonBackupPoint): parameters = CommonBackupPoint.parameters + [ Parameter( 'remote_url', required=True, help_str= 'synchronize data to this URL. Must ends by a folder name [*]'), Parameter( 'private_key', help_str= 'private key or certificate associated to \'remote_url\' [*]'), Parameter('ca_cert', help_str='CA certificate associated to \'remote_url\'. ' 'Set to "any" for not checking certificates [*]'), Parameter('ssh_options', help_str='SSH options associated to \'url\' [*]'), Parameter( 'keytab', help_str= 'absolute path of the keytab file (for Kerberos authentication) [*]' ), ] checks = CommonBackupPoint.checks + [ AttributeUniquess('remote_url'), FileIsReadable('private_key'), CaCertificate('ca_cert'), FileIsReadable('keytab') ] def __init__(self, name, remote_url='', keytab=None, private_key=None, ca_cert=None, ssh_options=None, **kwargs): super(Synchronize, self).__init__(name, **kwargs) self.remote_url = remote_url self.keytab = keytab self.private_key = private_key self.ca_cert = ca_cert self.ssh_options = ssh_options def do_backup(self, collect_point, export_data_path, info): backend = self._get_backend(collect_point) backend.sync_dir_from_local(export_data_path) def _get_backend(self, collect_point): remote_url = self.format_value(self.remote_url, collect_point) keytab = self.format_value(self.keytab, collect_point) private_key = self.format_value(self.private_key, collect_point) ca_cert = self.format_value(self.ca_cert, collect_point) ssh_options = self.format_value(self.ssh_options, collect_point) backend = get_backend(collect_point, remote_url, keytab=keytab, private_key=private_key, ca_cert=ca_cert, ssh_options=ssh_options, config=self.config) return backend def do_restore(self, collect_point, export_data_path): backend = self._get_backend(collect_point) backend.sync_dir_to_local(export_data_path)
class GitlabRepository(GitRepository): """Use a remote git repository and push local modifications to it. If the 'private_key' is set, then git+ssh is used for pushing data. Otherwise, use password or kerberos auth with git+http. The backup point is automatically created if required using the HTTP API provided by Gitlab. """ parameters = GitRepository.parameters[:-1] + [ Parameter( 'gitlab_url', help_str= 'HTTP URL of the gitlab server (e.g.: \'https://mygitlab.example.org/\') [*]', required=True), Parameter('project_name', help_str= 'Name of the Gitlab project (e.g. \'myuser/myproject\')[*]', required=True), Parameter( 'username', help_str= 'Username to use for pushing data. If you use git+ssh, use the SSH username' ' (often \'git\'), otherwise use your real username. [*]'), Parameter( 'password', help_str= 'Password for HTTP auth (if private_key and keytab are not set) [*]' ), Parameter( 'api_key', help_str='API key allowing for creating new repositories [*]', required=True), ] checks = GitRepository.checks + [ AttributeUniquess('project_name'), GitlabProjectName('project_name') ] def __init__(self, name, gitlab_url='', api_key=None, project_name='', username='', password='', private_key=None, **kwargs): parsed = urlparse(gitlab_url) if private_key: remote_url = '%s@%s.git' % (username, parsed.hostname) else: remote_url = '%s://%s:%s@%s/%s.git' % (parsed.scheme, username, password, parsed.hostname, project_name) # noinspection PyTypeChecker super(GitlabRepository, self).__init__(name, private_key=private_key, remote_url=remote_url, **kwargs) self.api_key = api_key self.project_name = project_name self.api_url = '%s://%s/api/v3' % (parsed.scheme, parsed.hostname) def check_remote_url(self, collect_point): project_name = self.format_value(self.project_name, collect_point) api_url = self.format_value(self.api_url, collect_point) api_key = self.format_value(self.api_key, collect_point) remote_url = self.format_value(self.remote_url, collect_point) headers = {'PRIVATE-TOKEN': api_key} r = requests.get('%s/projects/%s' % (api_url, quote_plus(project_name)), headers=headers) if r.status_code == requests.codes.ok: return True # noinspection PyTypeChecker namespace, sep, name = project_name.partition('/') data = {'name': name, 'namespace': namespace} if self.can_execute_command([ 'curl', '-X', 'POST', '-H', 'PRIVATE-TOKEN: %s' % api_key, '%s/projects/?%s' % (api_url, urlencode(data)) ]): r = requests.post('%s/projects/' % api_url, headers=headers, params=data) if r.status_code > 200: raise ValueError('Unable to create repository %s' % remote_url) # GET /projects/:id/events return True
class GitRepository(CommonBackupPoint): """Use a remote git repository and push local modifications to it. Can use https (with password or kerberos auth) or git+ssh remote URLs (with private key authentication). local and remote branches are always named 'master'. """ parameters = CommonBackupPoint.parameters + [ Parameter( 'keytab', help_str= 'absolute path of the keytab file (for Kerberos authentication) [*]' ), Parameter( 'private_key', help_str= 'absolute path of the private key file (for SSH key authentication) [*]' ), Parameter( 'commit_email', help_str='user email used for signing commits (default: "%s")' % DEFAULT_EMAIL), Parameter('commit_name', help_str='user name used for signing commits (default: "%s")' % DEFAULT_USERNAME), Parameter( 'commit_message', help_str= 'commit message (default: "Backup {Y}/{m}/{d} {H}:{M}") [*]'), Parameter( 'remote_url', help_str= 'URL of the remote server, including username and password (e.g.: ' 'ssh://[email protected]/project.git, file:///foo/bar/project.git or ' 'https://*****:*****@mygitlab.example.org/username/project.git). ' 'The password is not required for SSH connections (you should use SSH keys).' 'The backup point must already exists. If you created it by hand, do not ' 'forget to set \'git config --bool core.bare true\'. [*]', required=True), ] checks = CommonBackupPoint.checks + [ AttributeUniquess('remote_url'), FileIsReadable('private_key'), FileIsReadable('keytab'), Email('commit_email'), ValidGitUrl('remote_url') ] def __init__(self, name, remote_url='', remote_branch='master', private_key=None, keytab=None, commit_name=DEFAULT_USERNAME, commit_email=DEFAULT_EMAIL, commit_message='Backup {Y}/{m}/{d} {H}:{M}', **kwargs): super(GitRepository, self).__init__(name, **kwargs) self.keytab = keytab self.private_key = private_key self.remote_url = remote_url self.remote_branch = remote_branch self.commit_name = commit_name self.commit_email = commit_email self.commit_message = commit_message def do_backup(self, collect_point, export_data_path, info): assert isinstance(collect_point, CollectPoint) # just to help PyCharm worktree = export_data_path git_dir = os.path.join(self.private_path(collect_point), 'git') os.chdir(worktree) git_command = [ self.config.git_executable, '--git-dir', git_dir, '--work-tree', worktree ] self.execute_command(git_command + ['init'], cwd=worktree) self.execute_command([ self.config.git_executable, 'config', '--global', 'user.email', self.commit_email ], env={'HOME': git_dir}) self.execute_command([ self.config.git_executable, 'config', '--global', 'user.name', self.commit_name ], env={'HOME': git_dir}) self.execute_command(git_command + ['add', '.']) commit_message = self.format_value(self.commit_message, collect_point, check_metadata_requirement=False) # noinspection PyTypeChecker self.execute_command(git_command + ['commit', '-am', commit_message], ignore_errors=True, env={'HOME': git_dir}) remote_url = self.format_value(self.remote_url, collect_point) if not self.check_remote_url(collect_point): raise ValueError('Invalid backup point: %s' % remote_url) cmd = [] if self.keytab: keytab = self.format_value(self.keytab, collect_point, check_metadata_requirement=False) cmd += ['k5start', '-q', '-f', keytab, '-U', '--'] cmd += git_command + ['push', remote_url, 'master:master'] # noinspection PyTypeChecker if self.private_key and not remote_url.startswith('http'): private_key = self.format_value(self.private_key, collect_point, check_metadata_requirement=False) cmd = [ 'ssh-agent', 'bash', '-c', 'ssh-add %s ; %s' % (private_key, ' '.join(cmd)) ] self.execute_command(cmd, cwd=worktree, env={'HOME': git_dir}) def check_remote_url(self, collect_point): return True def do_restore(self, collect_point, export_data_path): assert isinstance(collect_point, CollectPoint) # just to help PyCharm worktree = export_data_path git_dir = os.path.join(self.private_path(collect_point), 'git') self.ensure_dir(git_dir, parent=True) self.ensure_absent(git_dir) self.ensure_dir(worktree, parent=True) self.ensure_absent(worktree) remote_url = self.format_value(self.remote_url, collect_point) cmd = [ self.config.git_executable, 'clone', '--separate-git-dir', git_dir, remote_url, worktree ] if self.keytab: keytab = self.format_value(self.keytab, collect_point, check_metadata_requirement=False) cmd += ['k5start', '-q', '-f', keytab, '-U', '--'] if self.private_key and not remote_url.startswith('http'): private_key = self.format_value(self.private_key, collect_point, check_metadata_requirement=False) cmd = [ 'ssh-agent', 'bash', '-c', 'ssh-add %s ; %s' % (private_key, ' '.join(cmd)) ] self.execute_command(cmd, cwd=os.path.dirname(worktree))
class BackupPoint(Point): constant_format_values = base_variables(use_constants=True) parameters = Point.parameters + [ Parameter( 'backup_point_tags', converter=strip_split, help_str= 'list of tags (comma-separated) associated to this backup point (default: "backup")' ), Parameter( 'included_collect_point_tags', converter=strip_split, help_str= 'any collect point with one of these tags (comma-separated) will be associated ' 'to this backup point. You can use ? or * as jokers in these tags.' ), Parameter( 'excluded_collect_point_tags', converter=strip_split, help_str= 'any collect point with one of these tags (comma-separated) will not be associated' ' to this backup point. You can use ? or * as jokers in these tags. Have precedence over ' 'included_collect_point_tags and included_backup_point_tags.'), ] checks = [] # list of callable(runner, backup_point, collect_points) def __init__(self, name, backup_point_tags=None, included_collect_point_tags=None, excluded_collect_point_tags=None, **kwargs): super(BackupPoint, self).__init__(name, **kwargs) self.backup_point_tags = [ 'backup' ] if backup_point_tags is None else backup_point_tags self.included_collect_point_tags = [ '*' ] if included_collect_point_tags is None else included_collect_point_tags self.excluded_collect_point_tags = excluded_collect_point_tags or [] self.collect_point_variables = {} # values specific to a collect_point: self.collect_point_variables[collect_point.name][key] = value # used to override remote parameters def format_value(self, value, collect_point, use_constant_values=False): if value is None: return None assert isinstance(collect_point, CollectPoint) variables = {} variables.update(self.variables) variables.update(collect_point.variables) if collect_point.name in self.collect_point_variables: variables.update(self.collect_point_variables[collect_point.name]) if use_constant_values: variables.update(self.constant_format_values) try: formatted_value = value.format(**variables) except KeyError as e: txt = text_type(e)[len('KeyError:'):] raise ValueError( 'Unable to format \'%s\': variable %s is missing' % (value, txt)) return formatted_value def backup(self, collect_point, force=False): """ perform the backup and log all errors """ self.print_info('backup point %s of collect point %s' % (self.name, collect_point.name)) info = self.get_info(collect_point) assert isinstance(info, PointInfo) assert isinstance(collect_point, CollectPoint) out_of_date = self.check_out_of_date_backup( current_time=datetime.datetime.now(), previous_time=info.last_success) if not (force or out_of_date): # the last previous backup is still valid # => nothing to do self.print_success( 'last backup (%s) is still valid. No backup to do.' % info.last_success) return True elif info.last_success is None: self.print_info('no previous backup: a new backup is required.') elif out_of_date: self.print_info('last backup (%s) is out-of-date.' % str(info.last_success)) elif force: self.print_info( 'last backup (%s) is still valid but a new backup is forced.' % str(info.last_success)) lock_ = None # collect only (but all) variables that are related to host and time info.variables = { k: v for (k, v) in list(collect_point.variables.items()) if k in self.constant_format_values } # these variables are required for a valid restore cwd = os.getcwd() try: if self.can_execute_command('# get lock'): lock_ = collect_point.get_lock() export_data_path = self.apply_backup_filters(collect_point) self.do_backup(collect_point, export_data_path, info) info.success_count += 1 info.last_state_valid = True info.last_success = datetime.datetime.now() info.last_message = 'ok' except Exception as e: self.print_error('unable to perform backup: %s' % text_type(e)) info.fail_count += 1 info.last_fail = datetime.datetime.now() info.last_state_valid = False info.last_message = text_type(e) finally: os.chdir(cwd) if lock_ is not None: try: if self.can_execute_command('# release lock'): collect_point.release_lock(lock_) except Exception as e: self.print_error('unable to release lock. %s' % text_type(e)) if self.can_execute_command('# register this backup point state'): self.set_info(collect_point, info) return info.last_state_valid def do_backup(self, collect_point, export_data_path, info): """send backup data from the collect point :param collect_point: the collect point :param export_data_path: where all data are stored (path) :param info: PointInfo object. its attribute `data` can be freely updated """ raise NotImplementedError def apply_backup_filters(self, collect_point): assert isinstance(collect_point, CollectPoint) next_path = collect_point.export_data_path for filter_ in self.filters: assert isinstance(filter_, FileFilter) next_path = filter_.backup(next_path, self.filter_private_path( collect_point, filter_), allow_in_place=False) return next_path def apply_restore_filters(self, collect_point): assert isinstance(collect_point, CollectPoint) next_path = collect_point.export_data_path filter_data = [] for filter_ in self.filters: assert isinstance(filter_, FileFilter) filter_data.append((filter_, next_path)) next_path = filter_.next_path(next_path, self.filter_private_path( collect_point, filter_), allow_in_place=False) for filter_, next_path in reversed(filter_data): assert isinstance(filter_, FileFilter) filter_.restore(next_path, self.filter_private_path(collect_point, filter_), allow_in_place=False) # noinspection PyMethodMayBeStatic def get_info(self, collect_point, force_backup=False): assert isinstance(collect_point, CollectPoint) path = os.path.join(self.private_path(collect_point), '%s.json' % self.name) if os.path.isfile(path): with codecs.open(path, 'r', encoding='utf-8') as fd: content = fd.read() return PointInfo.from_str(content) else: return PointInfo() # noinspection PyMethodMayBeStatic def set_info(self, collect_point, info): assert isinstance(collect_point, CollectPoint) assert isinstance(info, PointInfo) path = os.path.join(self.private_path(collect_point), '%s.json' % self.name) self.ensure_dir(path, parent=True) content = info.to_str() with codecs.open(path, 'w', encoding='utf-8') as fd: fd.write(content) def restore(self, collect_point): info = self.get_info(collect_point, force_backup=True) assert isinstance(collect_point, CollectPoint) assert isinstance(info, PointInfo) collect_point.variables.update(info.variables) next_path = collect_point.export_data_path for filter_ in self.filters: assert isinstance(filter_, FileFilter) next_path = filter_.next_path(next_path, self.filter_private_path( collect_point, filter_), allow_in_place=False) self.do_restore(collect_point, next_path) self.apply_restore_filters(collect_point) def do_restore(self, collect_point, export_data_path): raise NotImplementedError @lru_cache() def private_path(self, collect_point): assert isinstance(collect_point, CollectPoint) return os.path.join(collect_point.backup_point_private_path(self), 'remote') @lru_cache() def filter_private_path(self, collect_point, filter_): assert isinstance(collect_point, CollectPoint) assert isinstance(filter_, FileFilter) return os.path.join(collect_point.backup_point_private_path(self), 'filter-%s' % filter_.name) def execute_hook(self, when, cm, collect_point, result=None): assert isinstance(collect_point, CollectPoint) result_ = {(self.name, collect_point.name): result} for hook in self.hooks: assert isinstance(hook, Hook) if when in hook.hooked_events: hook.call(when, cm, {collect_point.name: True}, result_)
class CommonBackupPoint(BackupPoint): """A BackupPoint with meaningful implementations pour set_info/get_info""" parameters = BackupPoint.parameters + [ Parameter( 'metadata_url', required=False, help_str= 'send metadata (about the successful last backup) to this URL.' 'Should end by "/" or use the {name} variable [**]'), Parameter('metadata_private_key', help_str='private key associated to \'metadata_url\' [**]'), Parameter( 'metadata_ca_cert', help_str='private certificate associated to \'metadata_url\' [**]' ), Parameter( 'metadata_keytab', help_str= 'keytab (for Kerberos authentication) associated to \'metadata_url\' [**]' ), Parameter('metadata_ssh_options', help_str='SSH options associated to \'metadata_url\' [**]'), ] checks = BackupPoint.checks + [ AttributeUniquess('metadata_url'), FileIsReadable('metadata_private_key'), FileIsReadable('metadata_keytab'), CaCertificate('metadata_ca_cert') ] def __init__(self, name, metadata_url=None, metadata_private_key=None, metadata_ca_cert=None, metadata_keytab=None, metadata_ssh_options=None, **kwargs): super(CommonBackupPoint, self).__init__(name, **kwargs) self.metadata_url = metadata_url self.metadata_private_key = metadata_private_key self.metadata_ca_cert = metadata_ca_cert self.metadata_keytab = metadata_keytab self.metadata_ssh_options = metadata_ssh_options self.metadata_url_requirements = [] # list of values using non-constant values def format_value(self, value, collect_point, use_constant_values=False, check_metadata_requirement=True): """Check if the metadata_url is required: at least one formatted value uses non-constant values""" if use_constant_values: return super(CommonBackupPoint, self).format_value(value, collect_point, use_constant_values) result = super(CommonBackupPoint, self).format_value(value, collect_point, False) if check_metadata_requirement: constant_result = super(CommonBackupPoint, self).format_value(value, collect_point, True) if constant_result != result: self.metadata_url_requirements.append(value) return result def do_restore(self, collect_point, export_data_path): raise NotImplementedError def do_backup(self, collect_point, export_data_path, info): raise NotImplementedError def _get_metadata_backend(self, collect_point): assert isinstance(collect_point, CollectPoint) if self.metadata_url is None: p1 = 's' if len(self.metadata_url_requirements) > 1 else '' p2 = '' if len(self.metadata_url_requirements) > 1 else '' if self.metadata_url_requirements: self.print_error( 'value%s "%s" use%s time/host-dependent variables. ' 'You should define the "metadata_url" parameter to ease restore operation' % (p1, ', '.join(self.metadata_url_requirements), p2)) return None metadata_url = self.format_value(self.metadata_url, collect_point, use_constant_values=True) if metadata_url.endswith('/'): metadata_url += '%s.json' % collect_point.name metadata_private_key = self.format_value(self.metadata_private_key, collect_point, use_constant_values=True) metadata_ca_cert = self.format_value(self.metadata_ca_cert, collect_point, use_constant_values=True) metadata_keytab = self.format_value(self.metadata_keytab, collect_point, use_constant_values=True) metadata_ssh_options = self.format_value(self.metadata_ssh_options, collect_point, use_constant_values=True) backend = get_backend(self, metadata_url, keytab=metadata_keytab, private_key=metadata_private_key, ca_cert=metadata_ca_cert, ssh_options=metadata_ssh_options, config=self.config) assert isinstance(backend, StorageBackend) return backend @lru_cache() def get_info(self, collect_point, force_backup=False): assert isinstance(collect_point, CollectPoint) path = os.path.join(self.private_path(collect_point), '%s.json' % self.name) if not os.path.isfile(path) or force_backup: self.ensure_dir(path, parent=True) backend = self._get_metadata_backend(collect_point) if backend is not None: # noinspection PyBroadException try: backend.sync_file_to_local(path) except: # happens on the first sync (no remote data available) pass if os.path.isfile(path) and not force_backup: with codecs.open(path, 'r', encoding='utf-8') as fd: content = fd.read() return PointInfo.from_str(content) return PointInfo() def set_info(self, collect_point, info): assert isinstance(collect_point, CollectPoint) assert isinstance(info, PointInfo) path = os.path.join(self.private_path(collect_point), '%s.json' % self.name) self.ensure_dir(path, parent=True) content = info.to_str() with codecs.open(path, 'w', encoding='utf-8') as fd: fd.write(content) backend = self._get_metadata_backend(collect_point) if backend is not None: backend.sync_file_from_local(path)
class TarArchive(CommonBackupPoint): """Gather all files of your collect point into a .tar archive (.tar.gz, .tar.bz2 or .tar.xz) and copy it to the remote URL. """ excluded_files = {'.git', '.gitignore'} parameters = CommonBackupPoint.parameters + [ Parameter( 'remote_url', required=True, help_str= 'synchronize data to this URL, like \'ssh://user@hostname/folder/archive.tar.gz\'. ' 'Must end by ".tar.gz", "tar.bz2", "tar.xz" [*]'), Parameter( 'private_key', help_str= 'private key or certificate associated to \'remote_url\' [*]'), Parameter('ca_cert', help_str='CA certificate associated to \'remote_url\'. ' 'Set to "any" for not checking certificates [*]'), Parameter('ssh_options', help_str='SSH options associated to \'url\' [*]'), Parameter( 'keytab', help_str= 'absolute path of the keytab file (for Kerberos authentication) [*]' ), ] checks = CommonBackupPoint.checks + [ AttributeUniquess('remote_url'), FileIsReadable('private_key'), CaCertificate('ca_cert'), FileIsReadable('keytab') ] def __init__(self, name, remote_url='', keytab=None, private_key=None, ca_cert=None, ssh_options=None, **kwargs): super(TarArchive, self).__init__(name, **kwargs) self.remote_url = remote_url self.keytab = keytab self.private_key = private_key self.ca_cert = ca_cert self.ssh_options = ssh_options def _get_backend(self, collect_point): remote_url = self.format_value(self.remote_url, collect_point) keytab = self.format_value(self.keytab, collect_point) private_key = self.format_value(self.private_key, collect_point) ca_cert = self.format_value(self.ca_cert, collect_point) ssh_options = self.format_value(self.ssh_options, collect_point) backend = get_backend(collect_point, remote_url, keytab=keytab, private_key=private_key, ca_cert=ca_cert, ssh_options=ssh_options, config=self.config) return backend def do_backup(self, collect_point, export_data_path, info): assert isinstance(collect_point, CollectPoint) backend = self._get_backend(collect_point) remote_url = self.format_value(self.remote_url, collect_point) archive_filename = self.archive_name_prefix(collect_point) if remote_url.endswith('tar.gz'): archive_filename += '.tar.gz' cmd = [self.config.tar_executable, '-czf', archive_filename] elif remote_url.endswith('tar.bz2'): archive_filename += '.tar.bz2' cmd = [self.config.tar_executable, '-cjf', archive_filename] elif remote_url.endswith('tar.xz'): archive_filename += '.tar.xz' cmd = [self.config.tar_executable, '-cJf', archive_filename] else: raise ValueError('invalid tar format: %s' % remote_url) filenames = os.listdir(export_data_path) filenames.sort() cmd += filenames returncode, stdout, stderr = self.execute_command(cmd, cwd=export_data_path, ignore_errors=True) error = None if returncode != 0: error = ValueError('unable to create archive %s' % archive_filename) else: try: backend.sync_file_from_local(archive_filename) except Exception as e: error = e self.ensure_absent(archive_filename) if error is not None: raise error def archive_name_prefix(self, collect_point): return os.path.join(self.private_path(collect_point), 'archive') def do_restore(self, collect_point, export_data_path): assert isinstance(collect_point, CollectPoint) backend = self._get_backend(collect_point) remote_url = self.format_value(self.remote_url, collect_point) archive_filename = self.archive_name_prefix(collect_point) if remote_url.endswith('tar.gz'): archive_filename += '.tar.gz' elif remote_url.endswith('tar.bz2'): archive_filename += '.tar.bz2' elif remote_url.endswith('tar.xz'): archive_filename += '.tar.xz' else: raise ValueError('invalid tar format: %s' % remote_url) backend.sync_file_to_local(archive_filename) self.ensure_dir(export_data_path) self.execute_command([ self.config.tar_executable, '-C', export_data_path, '-xf', archive_filename ])
class Ldap(Source): """Dump a OpenLDAP database using slapcat to a filename in the collect point. Must be run on the LDAP server with a sudoer account (or 'root'). Require the 'slapcat' and 'slapadd' utilities. """ parameters = Source.parameters + [ Parameter('destination_path', help_str='filename of the dump (not an absolute path)'), Parameter('use_sudo', help_str='use sudo to perform the dump (yes/no)', converter=bool_setting), Parameter( 'data_directory', help_str='your LDAP base (if you want to restrict the dump)'), Parameter( 'ldap_base', help_str='your LDAP base dn (if you want to restrict the dump)'), Parameter('database', help_str='database number (default: 1)', converter=int), Parameter( 'dump_executable', converter=check_executable, help_str='path of the slapcat executable (default: "slapcat")'), Parameter( 'restore_executable', converter=check_executable, help_str='path of the slapadd executable (default: "slapadd")'), ] def __init__(self, name, collect_point, destination_path='ldap.ldif', dump_executable='slapcat', use_sudo=False, restore_executable='slapadd', database=1, ldap_base=None, **kwargs): super(Ldap, self).__init__(name, collect_point, **kwargs) self.destination_path = destination_path self.dump_executable = dump_executable self.restore_executable = restore_executable self.use_sudo = use_sudo self.ldap_base = ldap_base self.database = database def backup(self): filename = os.path.join(self.collect_point.import_data_path, self.destination_path) self.ensure_dir(filename, parent=True) cmd = [] if self.use_sudo: cmd += ['sudo'] cmd += [self.dump_executable] if self.ldap_base: cmd += ['-b', self.ldap_base] cmd += ['-n', str(self.database)] self.execute_command(cmd) if not self.can_execute_command(cmd + ['>', filename]): filename = os.devnull # run the dump even in dry mode with open(filename, 'wb') as fd: p = subprocess.Popen(cmd, stdout=fd, stderr=self.stderr) p.communicate() def restore(self): filename = os.path.join(self.collect_point.import_data_path, self.destination_path) if not os.path.isfile(filename): return prefix = [] if self.use_sudo: prefix += ['sudo'] # identify the database folder p = subprocess.Popen(prefix + [self.dump_executable, '-n', '0'], stdout=subprocess.PIPE, stderr=self.stderr) stdout, __ = p.communicate() database_folder = self.get_database_folder(io.BytesIO(stdout), str(self.database)) if database_folder is None: raise IOError('Unable to find database folder for database %s' % self.database) stat_info = os.stat(database_folder) uid = stat_info.st_uid gid = stat_info.st_gid user = pwd.getpwuid(uid)[0] group = grp.getgrgid(gid)[0] self.execute_command(prefix + ['service', 'slapd', 'stop']) self.execute_command(prefix + ['rm', '-rf', database_folder]) self.execute_command(prefix + ['mkdir', '-p', database_folder]) self.execute_command(prefix + [ self.restore_executable, '-l', filename, ]) self.execute_command( prefix + ['chown', '-R', '%s:%s' % (user, group), database_folder]) self.execute_command(prefix + ['service', 'slapd', 'start']) @staticmethod def get_database_folder(ldif_config, database_number): parser = LDIFParser(ldif_config) regexp = re.compile('^olcDatabase=\{%s\}(.*),cn=config$' % database_number) for dn, entry in parser.parse(): if not regexp.match(dn): continue return entry.get('olcDbDirectory', [None])[0] return None
class RollingTarArchive(TarArchive): """Gather all files of your collect point into a .tar archive (.tar.gz, .tar.bz2 or .tar.xz) and copy it to the remote URL. Also tracks previous archives to only keep a given number of hourly/daily/weekly/yearly backups, deleting unneeded ones. """ parameters = TarArchive.parameters + [ Parameter('hourly_count', converter=int, default_str_value='0', help_str='Number of hourly backups to keep (default to 0)'), Parameter('daily_count', converter=int, default_str_value='30', help_str='Number of daily backups to keep (default to 30)'), Parameter('weekly_count', converter=int, default_str_value='100', help_str='Number of weekly backups to keep ' '(default to 100)'), Parameter('yearly_count', converter=int, default_str_value='200', help_str='Number of yearly backups to keep (fefault to 20)'), ] for index, parameter in enumerate(parameters): if parameter.arg_name == 'remote_url': parameters[index] = Parameter( 'remote_url', required=True, help_str= 'synchronize data to this URL (SHOULD DEPEND ON THE DATE AND TIME): ' '\'file:///var/backup/archive-{Y}-{m}-{d}_{H}-{M}.tar.gz\'' 'Must end by ".tar.gz", "tar.bz2", "tar.xz" [*]') break def __init__(self, name, hourly_count=1, daily_count=30, weekly_count=10, yearly_count=20, **kwargs): super(RollingTarArchive, self).__init__(name, **kwargs) self.hourly_count = hourly_count self.daily_count = daily_count self.weekly_count = weekly_count self.yearly_count = yearly_count def do_backup(self, collect_point, export_data_path, info): super(RollingTarArchive, self).do_backup(collect_point, export_data_path, info) if info.data is None: info.data = [] # info.data must be a list of dict (old values) info.data.append(info.variables) if self.can_execute_command('# register this backup point state'): info.last_state_valid = True info.last_success = datetime.datetime.now() self.set_info(collect_point, info) # ok, there we have to check which old backup must be removed values = [] time_to_values = {} # noinspection PyTypeChecker for value_dict in info.data: d = datetime.datetime(year=int(value_dict['Y']), month=int(value_dict['m']), day=int(value_dict['d']), hour=int(value_dict['H']), minute=int(value_dict['M']), second=int(value_dict['S'])) values.append(d) time_to_values[d] = value_dict values.sort(reverse=True) times = OrderedDict() for d in values: times[d] = False now = datetime.datetime.now() if self.hourly_count: times = self.set_accepted_times( datetime.timedelta(hours=1), times, not_before_time=now - datetime.timedelta(hours=self.hourly_count)) if self.daily_count: times = self.set_accepted_times( datetime.timedelta(days=1), times, not_before_time=now - datetime.timedelta(days=self.daily_count)) if self.weekly_count: times = self.set_accepted_times( datetime.timedelta(days=7), times, not_before_time=now - datetime.timedelta(days=self.weekly_count * 7)) if self.yearly_count: times = self.set_accepted_times( datetime.timedelta(days=365), times, not_before_time=now - datetime.timedelta(days=self.yearly_count * 365)) to_remove_values = [d for (d, v) in list(times.items()) if not v] to_keep_values = [d for (d, v) in list(times.items()) if v] info.data = [time_to_values[d] for d in reversed(to_keep_values)] for data in to_remove_values: collect_point.variables = time_to_values[data] backend = self._get_backend(collect_point) backend.delete_on_distant() @staticmethod def set_accepted_times(min_accept_interval, ordered_times, not_before_time=None, not_after_time=None): """ 'require at least one `True` value in `ordered_times` each `min_accept_interval` until `max_checked_time`. :param min_accept_interval: at least one True value is required in this interval :param ordered_times: is an OrderedDict with datetime keys and boolean values. :param not_before_time: any key smaller than it is ignored :param not_after_time: any key greater than it is ignored >>> times = OrderedDict() >>> times[0] = False >>> times[3] = False >>> times[4] = False >>> times[5] = False >>> times[7] = False >>> times[8] = False >>> times[9] = False >>> result = RollingTarArchive.set_accepted_times(3, times, not_after_time=14) >>> print(result) OrderedDict([(0, True), (3, True), (4, False), (5, False), (7, True), (8, False), (9, False)]) """ assert isinstance(ordered_times, OrderedDict) previous_time = None result = OrderedDict() for current_time, state in list(ordered_times.items()): if not_before_time is not None and current_time < not_before_time: result[current_time] = state elif not_after_time is not None and current_time > not_after_time: result[current_time] = state elif previous_time is None: result[current_time] = True elif abs(previous_time - current_time) >= min_accept_interval: result[current_time] = True else: result[current_time] = state if result[current_time]: previous_time = current_time return result def archive_name_prefix(self, collect_point): archive_name = self.format_value('archive-{Y}-{m}-{d}_{H}-{M}', collect_point) return os.path.join(self.private_path(collect_point), archive_name)
class EmailHook(Hook): """Send an email to one or more recipient when the hook is called. Some extra variables are available: * "status" ('--' for "before_backup" hooks, 'OK' or 'KO' otherwise) , * "detailed_status" (one 'KO'/'OK' per line, for each backup or collect point, empty for "before_backup" hooks) * "complete_log" (the complete stdout log). """ default_content = "{detailed_status}\n\n{complete_log}" default_subject = "[BACKUP][{fqdn}] {Y}/{m}/{d} {H}:{M} [{status}]" parameters = Hook.parameters + [ Parameter('recipient', required=True, help_str='recipients, separated by commas [*]'), Parameter('subject', help_str='subject (default to "%s") [*]' % default_subject), Parameter( 'content', help_str='mail content (default to "%s") [*]' % default_content), Parameter('sender', help_str='from address (default to %s) [*]' % DEFAULT_EMAIL), Parameter('hostname', help_str='SMTP server name (default to "localhost")'), Parameter('port', help_str='SMTP server port', converter=int), Parameter('username', help_str='SMTP client username'), Parameter('password', help_str='SMTP client password'), Parameter('keyfile', help_str='client PEM key file'), Parameter('certfile', help_str='client PEM cert file'), Parameter('encryption', help_str='Encryption method ("none", "starttls" or "tls")', converter=CheckOption(["none", "starttls", "tls"])), ] def __init__(self, name, runner, recipient='', subject=default_subject, content=default_content, sender=DEFAULT_EMAIL, hostname='localhost', port=0, username=None, password=None, keyfile=None, certfile=None, encryption="none", **kwargs): super(EmailHook, self).__init__(name, runner, **kwargs) self.recipient = recipient self.subject = subject self.content = content self.sender = sender self.hostname = hostname self.port = port self.username = username self.password = password self.keyfile = keyfile self.certfile = certfile self.encryption = encryption def call(self, when, cm, collect_point_results, backup_point_results): self.set_extra_variables(cm, collect_point_results, backup_point_results) msg = MIMEText(self.format_value(self.content)) # me == the sender's email address # you == the recipient's email address msg['Subject'] = self.format_value(self.subject) msg['From'] = self.format_value(self.sender) msg['To'] = self.format_value(self.recipient) # Send the message via our own SMTP server. if self.encryption == "tls": smtp = smtplib.SMTP_SSL(host=self.hostname, port=self.port, keyfile=self.keyfile, certfile=self.certfile) else: smtp = smtplib.SMTP(host=self.hostname, port=self.port) if self.encryption == 'starttls': smtp.starttls(keyfile=self.keyfile, certfile=self.certfile) if self.username and self.password: smtp.login(self.username, self.password) smtp.send_message(msg) smtp.quit()
class Dovecot(Source): """Dump a OpenLDAP database using slapcat to a filename in the collect point. Require the 'doveadm' utility.""" parameters = Source.parameters + [ Parameter('destination_path', help_str='dirname of the dump (not an absolute path)'), Parameter('mailbox', help_str='only sync this mailbox name'), Parameter( 'socket', help_str= 'The option\'s argument is either an absolute path to a local UNIX domain socket,' ' or a hostname and port (hostname:port), in order to connect a remote host via a' ' TCP socket.'), Parameter( 'user_mask', help_str='only sync this user ("*" and "?" wildcards can be used).' ), Parameter( 'dump_executable', converter=check_executable, help_str='path of the doveadm executable (default: "doveadm")'), ] def __init__(self, name, collect_point, destination_path='dovecot', dump_executable='doveadm', mailbox=None, user_mask=None, socket=None, **kwargs): super(Dovecot, self).__init__(name, collect_point, **kwargs) self.socket = socket self.destination_path = destination_path self.dump_executable = dump_executable self.mailbox = mailbox self.user_mask = user_mask def backup(self): self.perform_action(restore=False) def restore(self): self.perform_action(restore=True) def perform_action(self, restore): dirname = os.path.join(self.collect_point.import_data_path, self.destination_path) self.ensure_dir(dirname) cmd = [ self.dump_executable, 'backup', ] if restore: cmd += ['-R'] if self.mailbox: cmd += [ '-m', self.mailbox, ] if self.socket: cmd += ['-S', self.socket] if self.user_mask is None: cmd += ['-A'] else: cmd += ['-u', self.user_mask] cmd += [dirname] self.execute_command(cmd)
class Hook(ParameterizedObject): parameters = ParameterizedObject.parameters + [ Parameter( 'events', converter=strip_split, required=True, help_str= 'list of events (comma-separated) that trigger this hook: "before_backup",' '"backup_success", "backup_error", "after_backup".'), ] keep_output = True def __init__(self, name, runner, parameterized_object, events=None, **kwargs): super(Hook, self).__init__(name, **kwargs) assert isinstance(parameterized_object, ParameterizedObject) self.runner = runner self.parameterized_object = parameterized_object self.hooked_events = set(events) def stderr(self): return self.runner.stderr def stdout(self): return self.runner.stdout def print_message(self, *args, **kwargs): return self.runner.print_message(*args, **kwargs) def call(self, when, cm, collect_point_results, backup_point_results): assert isinstance(when, text_type) assert isinstance(cm, FileContentMonitor) assert isinstance(collect_point_results, dict) # dict[collect_point.name] = True/False assert isinstance( backup_point_results, dict) # dict[(backup_point.name, collect_point.name)] = True/False raise NotImplementedError def set_extra_variables(self, cm, collect_point_results, backup_point_results): self.variables.update(self.parameterized_object.variables) assert isinstance(cm, FileContentMonitor) content = cm.get_text_content() if not collect_point_results and not backup_point_results: self.variables['status'] = '--' elif all(collect_point_results.values()) and all( backup_point_results.values()): self.variables['status'] = 'OK' else: self.variables['status'] = 'KO' text_values = {True: 'OK', False: 'KO'} detailed_status = [ '%s: %s' % (text_values[key], key) for key in sorted(collect_point_results) ] detailed_status += [ '%s: %s on %s' % (text_values[key], key[0], key[1]) for key in sorted(backup_point_results) ] self.variables.update({ 'complete_log': content, 'detailed_status': '\n'.join(detailed_status) })
class LocalFiles(Source): """copy all files from the given source_path to the collect point using 'rsync'. The destination is a folder inside the collect point. """ parameters = Source.parameters + [ Parameter('source_path', converter=check_directory, help_str='original folder to backup', required=True), Parameter( 'destination_path', help_str='destination folder (relative path, e.g. "./files")', required=True), Parameter( 'exclude', help_str= 'exclude files matching PATTERN (see --exclude option from rsync). ' 'If PATTERN startswith @, then it should be the absolute path of a file ' '(see --exclude-from option from rsync)'), Parameter( 'include', help_str= 'only include files matching PATTERN (see --include option from rsync). ' 'If PATTERN startswith @, then it should be the absolute path of a file ' '(see --include-from option from rsync)'), Parameter('preserve_hard_links', converter=bool_setting, help_str='true|false: preserve hard links'), ] def __init__(self, name, collect_point, source_path='', destination_path='', exclude='', include='', preserve_hard_links='', **kwargs): """ :param collect_point: collect point where files are stored :param source_path: absolute path of a directory to backup :param destination_path: relative path of the backup destination (must be a directory name, e.g. "data") :param exclude: exclude files matching PATTERN. If PATTERN starts with '@', it must be the absolute path of a file (cf. the --exclude-from option from rsync) :param include: don't exclude files matching PATTERN. If PATTERN starts with '@', it must be the absolute path of a file (cf. the --include-from option from rsync) :param preserve_hard_links: preserve hard links """ super(LocalFiles, self).__init__(name, collect_point, **kwargs) self.source_path = source_path self.destination_path = destination_path self.exclude = exclude self.include = include self.preserve_hard_links = preserve_hard_links.lower().strip() in ( 'yes', 'true', 'on', '1') def backup(self): cmd = [ self.config.rsync_executable, '-a', '--delete', '-S', ] if self.preserve_hard_links: cmd.append('-H') # noinspection PyTypeChecker if self.exclude and self.exclude.startswith('@'): cmd += ['--exclude-from', self.exclude[1:]] elif self.exclude: cmd += ['--exclude', self.exclude] # noinspection PyTypeChecker if self.include and self.include.startswith('@'): cmd += ['--include-from', self.include[1:]] elif self.include: cmd += ['--include', self.include] dirname = os.path.join(self.collect_point.import_data_path, self.destination_path) self.ensure_dir(dirname) source = self.source_path if not source.endswith(os.path.sep): source += os.path.sep if not dirname.endswith(os.path.sep): dirname += os.path.sep cmd += [source, dirname] self.execute_command(cmd) def restore(self): cmd = [ self.config.rsync_executable, '-a', '--delete', '-S', ] if self.preserve_hard_links: cmd.append('-H') dirname = os.path.join(self.collect_point.import_data_path, self.destination_path) source = self.source_path self.ensure_dir(dirname) self.ensure_dir(source) if not source.endswith(os.path.sep): source += os.path.sep if not dirname.endswith(os.path.sep): dirname += os.path.sep cmd += [dirname, source] self.execute_command(cmd)
class CollectPoint(Point): """Collect point, made of one or more sources. Each source is run and contribute to new """ parameters = Point.parameters + [ Parameter( 'collect_point_tags', converter=strip_split, help_str= 'list of tags (comma-separated) associated to this collect point. Default: "collect"' ), Parameter( 'included_backup_point_tags', converter=strip_split, help_str= 'any backup point with one of these tags (comma-separated) will be associated ' 'to this local repo. You can use ? or * as jokers in these tags. Default: "*"' ), Parameter( 'excluded_backup_point_tags', converter=strip_split, help_str= 'any backup point with one of these tags (comma-separated) will not be associated' ' to this local repo. You can use ? or * as jokers in these tags. Have precedence over ' 'included_collect_point_tags and included_backup_point_tags.'), ] checks = [] # list of callable(runner, collect_point, backup_points) def __init__(self, name, collect_point_tags=None, included_backup_point_tags=None, excluded_backup_point_tags=None, **kwargs): super(CollectPoint, self).__init__(name=name, **kwargs) self.collect_point_tags = [ 'collect' ] if collect_point_tags is None else collect_point_tags self.included_backup_point_tags = [ '*' ] if included_backup_point_tags is None else included_backup_point_tags self.excluded_backup_point_tags = excluded_backup_point_tags or [] self.sources = [] # self.last_backup_file = last_backup_file def backup(self, force=False): """ perform the backup and log all errors """ self.print_info('backup of collect point %s' % self.name) info = self.get_info() assert isinstance(info, PointInfo) out_of_date = self.check_out_of_date_backup( current_time=datetime.datetime.now(), previous_time=info.last_success) if not (force or out_of_date): # the last previous backup is still valid # => nothing to do self.print_success( 'last backup (%s) is still valid. No backup to do.' % info.last_success) return True elif info.last_success is None: self.print_info('no previous backup: a new backup is required.') elif out_of_date: self.print_info('last backup (%s) is out-of-date.' % str(info.last_success)) elif force: self.print_info( 'last backup (%s) is still valid but a new backup is forced.' % str(info.last_success)) lock_ = None cwd = os.getcwd() try: if self.can_execute_command(''): lock_ = self.get_lock() self.pre_source_backup() for source in self.sources: source.backup() self.post_source_backup() next_path = self.private_data_path for filter_ in self.filters: next_path = filter_.backup(next_path, self.filter_private_path(filter_), allow_in_place=True) info.total_size = self.get_repository_size() info.success_count += 1 info.last_state_valid = True info.last_success = datetime.datetime.now() info.last_message = 'ok' except Exception as e: self.print_error('unable to perform backup: %s' % text_type(e)) info.fail_count += 1 info.last_fail = datetime.datetime.now() info.last_state_valid = False info.last_message = text_type(e) finally: os.chdir(cwd) if lock_ is not None: try: if self.can_execute_command(''): self.release_lock(lock_) except Exception as e: self.print_error('unable to release lock. %s' % text_type(e)) if self.can_execute_command('# register this backup state'): self.set_info(info) return info.last_state_valid def restore(self): next_path = self.private_data_path filter_data = [] for filter_ in self.filters: filter_data.append((filter_, next_path)) next_path = filter_.next_path(next_path, self.filter_private_path(filter_), allow_in_place=True) for filter_, next_path in reversed(filter_data): filter_.restore(next_path, self.filter_private_path(filter_), allow_in_place=True) self.pre_source_restore() for source in self.sources: source.restore() self.post_source_restore() def add_source(self, source): """ :param source: source :type source: :class:`polyarchiv.sources.Source` """ self.sources.append(source) @property def import_data_path(self): """Must return a valid directory where a source can write its files. If the collect point is not the filesystem, any file written in this directory by a source must be stored to the collect point's storage. """ raise NotImplementedError @property def private_data_path(self): """where all exported data are actually stored and where the first filter are applied""" raise NotImplementedError @cached_property def export_data_path(self): """exported data by the last filter""" from polyarchiv.filters import FileFilter next_path = self.private_data_path for filter_ in self.filters: assert isinstance(filter_, FileFilter) next_path = filter_.next_path(next_path, self.filter_private_path(filter_), allow_in_place=True) return next_path @property def metadata_path(self): raise NotImplementedError def pre_source_backup(self): """called before the first source backup""" pass def post_source_backup(self): """called after the last source backup""" pass def pre_source_restore(self): """called before the first source restore""" pass def post_source_restore(self): """called after the last source restore""" pass def get_repository_size(self): """ return the size of the repository (in bytes) :return: :rtype: """ raise NotImplementedError def get_info(self): raise NotImplementedError def set_info(self, info): raise NotImplementedError def get_lock(self): """Return a lock object, ensuring that only one instance of this repository is currently running""" raise NotImplementedError def release_lock(self, lock_): """Release the lock object provided by the above method""" raise NotImplementedError def backup_point_private_path(self, backup_point): from polyarchiv.backup_points import BackupPoint assert isinstance(backup_point, BackupPoint) raise NotImplementedError def filter_private_path(self, filter_): from polyarchiv.filters import FileFilter assert isinstance(filter_, FileFilter) raise NotImplementedError def execute_hook(self, when, cm, result=None): result_ = {self.name: result} for hook in self.hooks: assert isinstance(hook, Hook) if when in hook.hooked_events: hook.call(when, cm, result_, {})