Esempio n. 1
0
class Hashsum(FileFilter):
    """Add a new file (default: 'hashes.txt') with the hash of all backuped files."""
    parameters = FileFilter.parameters + [
        Parameter('method',
                  converter=CheckOption(['sha1', 'md5', 'sha256']),
                  help_str='method: sha1, md5 or sha256'),
        Parameter('filename',
                  help_str='index file (default to \'hashes.txt\')'),
    ]
    work_in_place = True

    def __init__(self, name, method='sha1', filename='hashes.txt', **kwargs):
        super(Hashsum, self).__init__(name, **kwargs)
        self.method = method
        self.filename = filename

    def do_restore(self,
                   previous_path,
                   next_path,
                   private_path,
                   allow_in_place=True):
        cmd_str = {
            'sha1': 'shasum -a 1 -c',
            'md5': 'md5sum -c',
            'sha256': 'shasum -a 256 -c'
        }[self.method]
        index_path = os.path.abspath(os.path.join(next_path, self.filename))
        cmd = shlex.split(cmd_str) + [index_path]
        self.execute_command(cmd, cwd=next_path)

    def do_backup(self,
                  previous_path,
                  next_path,
                  private_path,
                  allow_in_place=True):
        cmd = {
            'sha1': 'shasum -a 1 -b',
            'md5': 'md5sum -b',
            'sha256': 'shasum -a 256 -b'
        }[self.method]
        index_path = os.path.abspath(os.path.join(next_path, self.filename))
        fd = codecs.open(os.devnull, 'w', encoding='utf-8')
        if self.can_execute_command(['rm', index_path]):
            fd = codecs.open(index_path, 'w', encoding='utf-8')
        for root, dirnames, filenames in os.walk(next_path):
            for filename in filenames:
                src_path = os.path.abspath(os.path.join(root, filename))
                if src_path == index_path:
                    continue
                hash_obj = getattr(hashlib, self.method)()
                with open(src_path, 'rb') as src_fd:
                    for data in iter(lambda: src_fd.read(16384), b''):
                        hash_obj.update(data)
                if self.can_execute_command('%s %s >> %s' %
                                            (cmd, src_path, index_path)):
                    fd.write("%s *%s\n" %
                             (hash_obj.hexdigest(),
                              os.path.relpath(src_path, next_path)))
        fd.close()
Esempio n. 2
0
class PostgresSQL(MySQL):
    """Dump the content of a PostgresSQL database with the pg_dump utility to a filename in the collect point.
    Require the 'pg_dump' and 'psql' utilities."""
    parameters = MySQL.parameters[:-2] + [
        Parameter(
            'dump_executable',
            converter=check_executable,
            help_str='path of the pg_dump executable (default: "pg_dump")'),
        Parameter('restore_executable',
                  converter=check_executable,
                  help_str='path of the psql executable (default: "psql")'),
    ]

    def __init__(self,
                 name,
                 collect_point,
                 port='5432',
                 dump_executable='pg_dump',
                 restore_executable='psql',
                 **kwargs):
        super(PostgresSQL,
              self).__init__(name,
                             collect_point,
                             port=port,
                             dump_executable=dump_executable,
                             restore_executable=restore_executable,
                             **kwargs)

    def get_dump_cmd_list(self):
        command = [self.dump_executable]
        if self.user:
            command += ['--username=%s' % self.user]
        if self.host:
            command += ['--host=%s' % self.host]
        if self.port:
            command += ['--port=%s' % self.port]
        command += [self.database]
        return command

    def get_env(self):
        """Extra environment variables to be passed to shell execution"""
        if self.password:
            return {'PGPASSWORD': self.password}
        return {}
Esempio n. 3
0
class Point(ParameterizedObject):
    parameters = ParameterizedObject.parameters + [
        Parameter(
            'check_out_of_date_backup',
            'frequency',
            converter=get_is_time_elapsed,
            help_str=
            'frequency of backup operations. Can be an integer (number of seconds),\n'
            '"monthly:d" (at least the d-th day of each month, d = 0..28),\n'
            '"weekly:d" (the d-th day of each week, d = 0..6),\n'
            '"weekly" or "daily" (once a week or once a day),\n'
            '"daily:h" (the h-th hour of each day, h = 0..23)'),
    ]

    def __init__(self, name, check_out_of_date_backup=None, **kwargs):
        super(Point, self).__init__(name, **kwargs)
        self.check_out_of_date_backup = check_out_of_date_backup or get_is_time_elapsed(
            None)
        self.filters = []
        # list of `polyarchiv.filters.FileFilter`
        self.hooks = []
        # list of `polyarchiv.hooks.Hook`

    def add_filter(self, filter_):
        from polyarchiv.filters import FileFilter
        assert isinstance(filter_, FileFilter)
        self.filters.append(filter_)

    def add_hook(self, hook):
        from polyarchiv.hooks import Hook
        assert isinstance(hook, Hook)
        self.hooks.append(hook)
        if hook.keep_output and not self.output_temp_fd:
            self.output_temp_fd = tempfile.TemporaryFile()

    @property
    def stderr(self):
        if self.verbosity >= 3 and not self.output_temp_fd:
            return None
        elif self.output_temp_fd:
            return self.output_temp_fd
        self.output_temp_fd = open(os.devnull, 'wb')
        return self.output_temp_fd

    @property
    def stdout(self):
        if self.verbosity >= 3 and not self.output_temp_fd:
            return None
        elif self.output_temp_fd:
            return self.output_temp_fd
        self.output_temp_fd = open(os.devnull, 'wb')
        return self.output_temp_fd
Esempio n. 4
0
class ArchiveRepository(FileRepository):
    """Create an archive (.tar.gz, .tar.xz or .tar.bz2) with files collected from all sources."""

    parameters = FileRepository.parameters + [
        Parameter('archive_name',
                  converter=check_archive,
                  help_str='Name of the created archive, must end by .tar.gz, '
                  '.tar.bz2 or .tar.xz. Default: "archive.tar.gz"[*]')
    ]

    def __init__(self, name, archive_name='archive.tar.gz', **kwargs):
        super(ArchiveRepository, self).__init__(name=name, **kwargs)
        self.archive_name = archive_name

    def post_source_backup(self):
        super(ArchiveRepository, self).post_source_backup()
        self.ensure_dir(self.private_data_path)
        comp = 'j'
        archive_name = self.format_value(self.archive_name)
        if archive_name.endswith('.tar.gz'):
            comp = 'z'
        elif archive_name.endswith('.tar.xz'):
            comp = 'x'
        file_list = os.listdir(self.import_data_path)
        full_path = os.path.join(self.private_data_path, archive_name)
        if file_list:
            self.execute_command(['tar', '-c%sf' % comp, full_path] +
                                 file_list,
                                 cwd=self.import_data_path)
        elif self.can_execute_command(['tar', '-c%sf' % comp, full_path]):
            mode = {'j': 'w:bz2', 'x': 'w:xz', 'z': 'w:gz'}[comp]
            tarfile.open(name=full_path, mode=mode).close()
        if self.can_execute_command(['rm', '-rf', self.import_data_path]):
            shutil.rmtree(self.import_data_path)

    @property
    def private_data_path(self):
        path = os.path.join(self.local_path, 'archives')
        return self.format_value(path)

    def pre_source_restore(self):
        archive_name = self.format_value(self.archive_name)
        full_path = os.path.join(self.private_data_path, archive_name)
        path = self.import_data_path
        if (os.path.isdir(path)
                and os.listdir(path)) and self.can_execute_command(
                    ['rm', '-rf', path]):
            shutil.rmtree(path)
        self.ensure_dir(path)
        self.execute_command(['tar', '-C', path, '-xf', full_path])
Esempio n. 5
0
class LogHook(Hook):
    """store PolyArchiv's output to the given path. Be sure to set `keep_output` to `y`."""
    parameters = Hook.parameters + [
        Parameter('path', required=True, help_str='path of the log file [*]'),
    ]

    def __init__(self, name, runner, path=None, **kwargs):
        super(LogHook, self).__init__(name, runner, **kwargs)
        self.path = path

    def call(self, when, cm, collect_point_results, backup_point_results):
        assert isinstance(cm, FileContentMonitor)
        path = self.format_value(self.path)
        with open(path, 'wb') as fd:
            cm.copy_content(fd, close=False)
Esempio n. 6
0
class Config(object):
    parameters = [
        Parameter('rsync_executable',
                  converter=check_executable,
                  help_str='full path of the "rsync" executable'),
        Parameter('curl_executable',
                  converter=check_executable,
                  help_str='full path of the "curl" executable'),
        Parameter('scp_executable',
                  converter=check_executable,
                  help_str='full path of the "scp" executable'),
        Parameter('ssh_executable',
                  converter=check_executable,
                  help_str='full path of the "ssh" executable'),
        Parameter('tar_executable',
                  converter=check_executable,
                  help_str='full path of the "tar" executable'),
        Parameter('svn_executable',
                  converter=check_executable,
                  help_str='full path of the "svn" executable'),
    ]

    def __init__(self,
                 command_display=True,
                 command_confirm=False,
                 command_execute=True,
                 command_keep_output=False,
                 rsync_executable='rsync',
                 curl_executable='curl',
                 git_executable='git',
                 scp_executable='scp',
                 ssh_executable='ssh',
                 tar_executable='tar',
                 svn_executable='svn'):
        self.command_display = command_display  # display each command before running it
        self.command_confirm = command_confirm  # ask the user to confirm each command
        self.command_execute = command_execute  # actually run commands (if False: 'dry' mode)
        self.command_keep_output = command_keep_output  # display all command outputs on stderr/stdout
        self.rsync_executable = rsync_executable
        self.curl_executable = curl_executable
        self.git_executable = git_executable
        self.scp_executable = scp_executable
        self.ssh_executable = ssh_executable
        self.tar_executable = tar_executable
        self.svn_executable = svn_executable
Esempio n. 7
0
class SymmetricCrypt(FileFilter):
    """Encrypt all files with symmetric encryption and a password (using GPG).
     The only required parameter is the password.

     Require 'gpg' to be installed and in $PATH.
    """
    parameters = FileFilter.parameters + [
        Parameter('gpg_executable',
                  converter=check_executable,
                  help_str='path of the gpg executable (default: "gpg")'),
        Parameter('password', help_str='password to encrypt data'),
    ]
    work_in_place = False

    def __init__(self,
                 name,
                 password='******',
                 gpg_executable='gpg',
                 **kwargs):
        super(SymmetricCrypt, self).__init__(name, **kwargs)
        self.password = password
        self.gpg_executable = gpg_executable

    def do_backup(self,
                  previous_path,
                  next_path,
                  private_path,
                  allow_in_place=True):
        symlinks = True
        if os.listdir(next_path):
            if self.can_execute_command(['rm', '-rf', next_path]):
                shutil.rmtree(next_path)
            if self.can_execute_command(['mkdir', '-p', next_path]):
                os.makedirs(next_path)
        for root, dirnames, filenames in os.walk(previous_path):
            for src_dirname in dirnames:
                clear_path = os.path.join(root, src_dirname)
                crypted_path = os.path.join(
                    next_path, os.path.relpath(clear_path, previous_path))
                if self.can_execute_command(['mkdir', '-p', crypted_path]):
                    os.makedirs(crypted_path)
                    shutil.copystat(clear_path, crypted_path)
            for src_filename in filenames:
                clear_path = os.path.join(root, src_filename)
                crypted_path = os.path.join(
                    next_path, os.path.relpath(clear_path, previous_path))
                if symlinks and os.path.islink(clear_path):
                    linkto = os.readlink(clear_path)
                    if self.can_execute_command(
                        ['ln', '-s', linkto, crypted_path]):
                        os.symlink(linkto, crypted_path)
                else:
                    cmd = [
                        'gpg', '--passphrase', self.password, '-o',
                        crypted_path, '-c', clear_path
                    ]
                    return_code, __, __ = self.execute_command(
                        cmd, stderr=self.stderr, stdout=self.stdout)
                    if return_code == 0 and os.path.isfile(
                            crypted_path) and os.path.isfile(clear_path):
                        shutil.copystat(clear_path, crypted_path)

    def do_restore(self,
                   previous_path,
                   next_path,
                   private_path,
                   allow_in_place=True):
        symlinks = True
        if os.listdir(previous_path):
            if self.can_execute_command(['rm', '-rf', previous_path]):
                shutil.rmtree(previous_path)
            if self.can_execute_command(['mkdir', '-p', previous_path]):
                os.makedirs(previous_path)
        for root, dirnames, filenames in os.walk(next_path):
            for src_dirname in dirnames:
                crypted_path = os.path.join(root, src_dirname)
                clear_path = os.path.join(
                    previous_path, os.path.relpath(crypted_path, next_path))
                if self.can_execute_command(['mkdir', '-p', clear_path]):
                    os.makedirs(clear_path)
                    shutil.copystat(crypted_path, clear_path)
            for src_filename in filenames:
                crypted_path = os.path.join(root, src_filename)
                clear_path = os.path.join(
                    previous_path, os.path.relpath(crypted_path, next_path))
                if symlinks and os.path.islink(crypted_path):
                    linkto = os.readlink(crypted_path)
                    if self.can_execute_command(
                        ['ln', '-s', linkto, clear_path]):
                        os.symlink(linkto, clear_path)
                else:
                    cmd = [
                        'gpg', '--passphrase', self.password, '-o', clear_path,
                        '--decrypt', crypted_path
                    ]
                    return_code, __, __ = self.execute_command(cmd)
                    if return_code == 0 and os.path.isfile(clear_path):
                        shutil.copystat(crypted_path, clear_path)
Esempio n. 8
0
class GitRepository(FileRepository):
    """Create a local git repository. Collect files from all sources and commit them locally.
    """
    parameters = FileRepository.parameters + [
        Parameter(
            'commit_email',
            help_str='user email used for signing commits (default: "%s") [*]'
            % DEFAULT_EMAIL),
        Parameter(
            'commit_name',
            help_str='user name used for signing commits (default: "%s") [*]' %
            DEFAULT_USERNAME),
        Parameter(
            'commit_message',
            help_str=
            'commit message (default: "Backup {Y}/{m}/{d} {H}:{M}") [*]'),
    ]

    def __init__(self,
                 name,
                 commit_name=DEFAULT_USERNAME,
                 commit_email=DEFAULT_EMAIL,
                 commit_message='Backup {Y}/{m}/{d} {H}:{M}',
                 **kwargs):
        super(GitRepository, self).__init__(name=name, **kwargs)
        self.commit_name = commit_name
        self.commit_email = commit_email
        self.commit_message = commit_message

    def post_source_backup(self):
        super(GitRepository, self).post_source_backup()
        git_config_path = os.path.join(self.metadata_path, '.gitconfig')
        if not os.path.isfile(git_config_path):
            self.execute_command([
                self.config.git_executable, 'config', '--global', 'user.email',
                self.format_value(self.commit_email)
            ],
                                 env={'HOME': self.metadata_path})
            self.execute_command([
                self.config.git_executable, 'config', '--global', 'user.name',
                self.format_value(self.commit_name)
            ],
                                 env={'HOME': self.metadata_path})
        os.chdir(self.import_data_path)
        self.execute_command([self.config.git_executable, 'init'],
                             cwd=self.import_data_path)
        self.execute_command([self.config.git_executable, 'add', '.'])
        self.execute_command([
            self.config.git_executable, 'commit', '-am',
            self.format_value(self.commit_message)
        ],
                             ignore_errors=True,
                             env={'HOME': self.metadata_path})

    def pre_source_restore(self):
        os.chdir(self.import_data_path)
        self.execute_command([self.config.git_executable, 'reset', '--hard'],
                             cwd=self.import_data_path,
                             env={'HOME': self.metadata_path})
        self.execute_command([self.config.git_executable, 'clean', '-f'],
                             cwd=self.import_data_path,
                             env={'HOME': self.metadata_path})
Esempio n. 9
0
class HttpHook(Hook):
    """Perform a HTTP request.

    """
    default_body = ''
    parameters = Hook.parameters + [
        Parameter('url', required=True, help_str='requested URL [*]'),
        Parameter('method', help_str='HTTP method (default to "GET")'),
        Parameter('body', help_str='request body (empty by default) [*]'),
        Parameter('username', help_str='HTTP username [*]'),
        Parameter('password', help_str='HTTP password [*]'),
        Parameter('keyfile', help_str='client PEM key file [*]'),
        Parameter('certfile', help_str='client PEM cert file [*]'),
        Parameter(
            'cafile',
            help_str=
            'CA cert PEM file, or "ignore" to ignore invalid certificates [*]'
        ),
        Parameter('proxy_url', help_str='Proxy URL [*]'),
        Parameter(
            'headers',
            help_str=
            'custom headers, space-separated, e.g. HEADER1=VALUE HEADER2="VA LUE"'
        ),
    ]

    def __init__(self,
                 name,
                 runner,
                 url='',
                 method='GET',
                 body=default_body,
                 username=None,
                 password=None,
                 keyfile=None,
                 certfile=None,
                 cafile=None,
                 proxy_url=None,
                 headers='',
                 **kwargs):
        super(HttpHook, self).__init__(name, runner, **kwargs)
        self.url = url
        self.method = method
        self.body = body
        self.username = username
        self.password = password
        self.keyfile = keyfile
        self.certfile = certfile
        self.cafile = cafile
        self.proxy_url = proxy_url
        self.headers = headers

    def call(self, when, cm, collect_point_results, backup_point_results):
        self.set_extra_variables(cm, collect_point_results,
                                 backup_point_results)
        kwargs = {}
        body = self.format_value(self.body)
        if body:
            kwargs['data'] = body
        keyfile, certfile = self.format_value(self.keyfile), self.format_value(
            self.certfile)
        if keyfile and certfile:
            kwargs['cert'] = (certfile, keyfile)
        cafile = self.format_value(self.cafile)
        if cafile == 'ignore':
            kwargs['verify'] = False
        elif cafile and os.path.isfile(cafile):
            kwargs['verify'] = cafile
        else:
            kwargs['verify'] = True
        proxy_url = self.format_value(self.proxy_url)
        if proxy_url:
            kwargs['proxy'] = {'http': proxy_url, 'https': proxy_url}
        username = self.format_value(self.username)
        password = self.format_value(self.password)
        if username and password:
            kwargs['auth'] = HTTPBasicAuth(username, password)
        headers = {}
        for splitted in shlex.split(self.format_value(self.headers)):
            header_name, sep, header_value = splitted.partition('=')
            if sep == '=':
                headers[header_name] = header_value
        if headers:
            kwargs['headers'] = headers
        url = self.format_value(self.url)
        req = requests.request(self.method, url, **kwargs)
        if req.status_code < 300:
            self.print_error('Request %s returned a %d code' %
                             (url, req.status_code))
        req.close()
Esempio n. 10
0
class FileRepository(CollectPoint):
    """Collect files from all sources in the folder 'local_path'.
    """

    parameters = CollectPoint.parameters + [
        Parameter(
            'local_path',
            converter=check_directory,
            required=True,
            help_str='absolute path where all data are locally gathered [*]')
    ]
    METADATA_FOLDER = 'metadata'

    def __init__(self, name, local_path='.', **kwargs):
        super(FileRepository, self).__init__(name=name, **kwargs)
        self.local_path = local_path

    def pre_source_backup(self):
        self.ensure_dir(self.import_data_path)

    @cached_property
    def import_data_path(self):
        path = self.format_value(os.path.join(self.local_path, 'backups'))
        return path

    @cached_property
    def private_data_path(self):
        """where all exported data are actually stored"""
        return self.import_data_path

    @cached_property
    def metadata_path(self):
        path = os.path.join(self.local_path, self.METADATA_FOLDER,
                            'collect_point')
        path = self.format_value(path)
        self.ensure_dir(path)
        return path

    @cached_property
    def lock_filepath(self):
        return os.path.join(self.metadata_path, 'lock')

    @lru_cache()
    def backup_point_private_path(self, backup_point):
        path = os.path.join(self.local_path, self.METADATA_FOLDER,
                            'remote-%s' % backup_point.name)
        return self.format_value(path)

    @lru_cache()
    def filter_private_path(self, filter_):
        path = os.path.join(self.local_path, self.METADATA_FOLDER,
                            'filter-%s' % filter_.name)
        return self.format_value(path)

    def get_info(self):
        path = os.path.join(self.metadata_path, '%s.json' % self.name)
        self.ensure_dir(path, parent=True)
        if os.path.isfile(path):
            with codecs.open(path, 'r', encoding='utf-8') as fd:
                content = fd.read()
            return PointInfo.from_str(content)
        else:
            return PointInfo()

    def set_info(self, info):
        assert isinstance(info, PointInfo)
        path = os.path.join(self.metadata_path, '%s.json' % self.name)
        self.ensure_dir(path, parent=True)
        content = info.to_str()
        with codecs.open(path, 'w', encoding='utf-8') as fd:
            fd.write(content)

    def get_lock(self):
        self.ensure_dir(self.lock_filepath, parent=True)
        lock_ = Lock(self.lock_filepath)
        if lock_.acquire(timeout=1):
            return lock_
        else:
            self.print_error(
                'Unable to lock collect point. Check if no other backup is currently running or '
                'delete %s' % self.lock_filepath)
            raise ValueError

    def get_repository_size(self):
        content = subprocess.check_output(['du', '-s'],
                                          cwd=self.local_path).decode()
        matcher = re.match('^(\d+) \.$', content.strip())
        if not matcher:
            return 0
        return int(matcher.group(1))

    def release_lock(self, lock_):
        lock_.release()

    def pre_source_restore(self):
        pass

    def post_source_restore(self):
        pass
Esempio n. 11
0
class RemoteFiles(Source):
    """copy the remote files from the given server/source_path to the collect point.
    The destination is a folder inside the collect point.
    Require 'rsync'.
    """
    parameters = Source.parameters + [
        Parameter(
            'source_url',
            required=True,
            help_str=
            'synchronize data from this URL. Must ends by a folder name'),
        Parameter('destination_path',
                  help_str='destination folder (like "./remote-files")',
                  required=True),
        Parameter(
            'private_key',
            help_str='private key or certificate associated to \'remote_url\''
        ),
        Parameter('ca_cert',
                  help_str='CA certificate associated to \'remote_url\'. '
                  'Set to "any" for not checking certificates'),
        Parameter('ssh_options', help_str='SSH options associated to \'url\''),
        Parameter(
            'keytab',
            converter=check_file,
            help_str=
            'absolute path of the keytab file (for Kerberos authentication)'),
    ]

    def __init__(self,
                 name,
                 collect_point,
                 source_url='',
                 destination_path='',
                 keytab=None,
                 private_key=None,
                 ca_cert=None,
                 ssh_options=None,
                 **kwargs):
        """
        :param collect_point: collect point where files are stored
        :param source_url: remote folders to add to the collect point
        :param destination_path: relative path of the backup destination (must be a directory name, e.g. "data")
        """
        super(RemoteFiles, self).__init__(name, collect_point, **kwargs)
        self.destination_path = destination_path
        self.source_url = source_url
        self.keytab = keytab
        self.private_key = private_key
        self.ca_cert = ca_cert
        self.ssh_options = ssh_options

    def backup(self):
        backend = self._get_backend()
        dirname = os.path.join(self.collect_point.import_data_path,
                               self.destination_path)
        backend.sync_dir_to_local(dirname)

    def _get_backend(self):
        backend = get_backend(self.collect_point,
                              self.source_url,
                              keytab=self.keytab,
                              private_key=self.private_key,
                              ca_cert=self.ca_cert,
                              ssh_options=self.ssh_options,
                              config=self.config)
        return backend

    def restore(self):
        backend = self._get_backend()
        dirname = os.path.join(self.collect_point.import_data_path,
                               self.destination_path)
        backend.sync_dir_from_local(dirname)
Esempio n. 12
0
class SvnRepository(FileRepository):
    """Collect files from all sources in the folder 'local_path' and commit them to a remote SVN repository.
    """

    parameters = FileRepository.parameters + [
        Parameter(
            'remote_url',
            required=True,
            help_str=
            'URL of the remote repository (must exist). Should contain username and password [*]'
        ),
        Parameter('ca_cert',
                  help_str='CA certificate associated to \'remote_url\'. '
                  'Set to "any" for not checking certificates [*]'),
        Parameter(
            'client_cert',
            help_str='Client certificate associated to \'remote_url\' [*]'),
        Parameter('client_cert_password',
                  help_str='Password for encrypted client certificates [*]'),
        Parameter(
            'commit_message',
            help_str=
            'commit message (default: "Backup {Y}/{m}/{d} {H}:{M}") [*]'),
    ]
    checks = FileRepository.checks + [ValidSvnUrl('remote_url')]

    def __init__(self,
                 name,
                 remote_url=None,
                 ca_cert=None,
                 client_cert=None,
                 client_cert_password=None,
                 commit_message='Backup {Y}/{m}/{d} {H}:{M}',
                 **kwargs):
        super(SvnRepository, self).__init__(name=name, **kwargs)
        remote_url, username, password = url_auth_split(
            self.format_value(remote_url))
        self.username = username
        self.password = password
        self.ca_cert = ca_cert
        self.remote_url = remote_url
        self.client_cert = client_cert
        self.commit_message = commit_message
        self.client_cert_password = client_cert_password

    @cached_property
    def svn_folder(self):
        return os.path.join(self.import_data_path, '.svn')

    def release_lock(self, lock_):
        lock_.release()

    def pre_source_backup(self):
        if not os.path.isdir(self.svn_folder):
            cmd = [
                self.config.svn_executable,
                'co',
                '--ignore-externals',
                '--force',
            ]
            cmd += self.__svn_parameters()
            cmd += [self.remote_url, self.import_data_path]
            self.execute_command(cmd)

    def post_source_backup(self):
        cmd = [self.config.svn_executable, 'status']
        p = subprocess.Popen(cmd,
                             cwd=self.import_data_path,
                             stdout=subprocess.PIPE,
                             stderr=open(os.devnull, 'wb'))
        stdout, stderr = p.communicate()
        to_add = []
        to_remove = []
        for line in stdout.decode('utf-8').splitlines():
            matcher = re.match(
                r'^([ ADMRCXI?!~])[ MC][ L][ +][ S][ KOTB][ C] (?P<name>.*)$',
                line)
            if not matcher:
                continue
            status, name = matcher.groups()
            if status == '?':
                to_add.append(name)
            elif status == '!':
                to_remove.append(name)
        if to_add:
            self.execute_command([self.config.svn_executable, 'add'] + to_add,
                                 cwd=self.import_data_path)
        if to_remove:
            self.execute_command(
                [self.config.svn_executable, 'rm', '--force'] + to_remove,
                cwd=self.import_data_path)
        message = self.format_value(self.commit_message)
        cmd = [self.config.svn_executable, 'ci', '-m', message]
        cmd += self.__svn_parameters()
        self.execute_command(cmd, cwd=self.import_data_path)

    def __svn_parameters(self):
        result = ['--non-interactive', '--no-auth-cache']
        if self.username:
            result += ['--username', self.username]
        if self.password:
            result += ['--password', self.password]
        ca_cert = self.format_value(self.ca_cert)
        if ca_cert == 'any':
            result += ['--trust-server-cert']
        elif ca_cert:
            result += [
                '--config-option',
                'servers:global:ssl-authority-files=%s' % ca_cert
            ]
        client_cert = self.format_value(self.client_cert)
        if client_cert:
            result += [
                '--config-option',
                'servers:global:ssl-client-cert-file=%s' % client_cert
            ]
        client_cert_password = self.format_value(self.client_cert_password)
        if client_cert_password:
            result += [
                '--config-option',
                'servers:global:ssl-client-cert-password=%s' %
                client_cert_password
            ]
        return result

    def pre_source_restore(self):
        self.pre_source_backup()
        cmd = [
            self.config.svn_executable,
            'up',
            '-r',
            'HEAD',
            '--ignore-externals',
            '--force',
            '--accept',
            'theirs-conflict',
        ]
        cmd += self.__svn_parameters()
        self.execute_command(cmd, cwd=self.import_data_path)
Esempio n. 13
0
class MySQL(Source):
    """Dump the content of a MySQL database with the mysqldump utility to a filename in the collect point.
    Require the 'mysql' and 'mysqldump' utilities. """
    parameters = Source.parameters + [
        Parameter('host', help_str='database host'),
        Parameter('port', converter=int, help_str='database port'),
        Parameter('sudo_user',
                  help_str='sudo user, used for all SQL operations',
                  converter=check_username),
        Parameter('user', help_str='database user'),
        Parameter('password', help_str='database password'),
        Parameter('database',
                  help_str='name of the backuped database',
                  required=True),
        Parameter(
            'destination_path',
            help_str=
            'relative path of the backup destination (e.g. "database.sql")'),
        Parameter(
            'dump_executable',
            converter=check_executable,
            help_str='path of the mysqldump executable (default: "mysqldump")'
        ),
        Parameter('restore_executable',
                  converter=check_executable,
                  help_str='path of the mysql executable (default: "mysql")'),
    ]

    def __init__(self,
                 name,
                 collect_point,
                 host='localhost',
                 port='3306',
                 user='',
                 password='',
                 database='',
                 destination_path='mysql_dump.sql',
                 sudo_user=None,
                 dump_executable='mysqldump',
                 restore_executable='mysql',
                 **kwargs):
        super(MySQL, self).__init__(name, collect_point, **kwargs)
        self.sudo_user = sudo_user
        self.restore_executable = restore_executable
        self.dump_executable = dump_executable
        self.host = host
        self.port = port
        self.user = user
        self.password = password
        self.database = database
        self.destination_path = destination_path

    def backup(self):
        filename = os.path.join(self.collect_point.import_data_path,
                                self.destination_path)
        self.ensure_dir(filename, parent=True)
        cmd = self.get_dump_cmd_list()
        if self.sudo_user:
            cmd = ['sudo', '-u', self.sudo_user] + cmd
        env = os.environ.copy()
        env.update(self.get_env())
        for k, v in list(self.get_env().items()):
            self.print_command('%s=%s' % (k, v))
        if not self.can_execute_command(cmd + ['>', filename]):
            filename = os.devnull  # run the dump even in dry mode
        with open(filename, 'wb') as fd:
            p = subprocess.Popen(cmd, env=env, stdout=fd, stderr=self.stderr)
            p.communicate()
        if p.returncode != 0:
            raise subprocess.CalledProcessError(p.returncode, cmd[0])

    def restore(self):
        filename = os.path.join(self.collect_point.import_data_path,
                                self.destination_path)
        if not os.path.isfile(filename):
            return
        cmd = self.get_restore_cmd_list()
        if self.sudo_user:
            cmd = ['sudo', '-u', self.sudo_user] + cmd
        env = os.environ.copy()
        env.update(self.get_env())
        for k, v in list(self.get_env().items()):
            self.print_command('%s=%s' % (k, v))
        # noinspection PyTypeChecker
        with open(filename, 'rb') as fd:
            self.execute_command(cmd,
                                 env=env,
                                 stdin=fd,
                                 stderr=self.stderr,
                                 stdout=self.stdout)

    def get_dump_cmd_list(self):
        """ :return:
        :rtype: :class:`list` of :class:`str`
        """
        command = [self.dump_executable]
        if self.user:
            command += ['--user=%s' % self.user]
        if self.password:
            command += ['--password=%s' % self.password]
        if self.host:
            command += ['--host=%s' % self.host]
        if self.port:
            command += ['--port=%s' % self.port]
        command += [self.database]
        return command

    def get_restore_cmd_list(self):
        """ :return:
        :rtype: :class:`list` of :class:`str`
        """
        command = self.get_dump_cmd_list()
        command[0] = self.restore_executable
        return command

    def get_env(self):
        """Extra environment variables to be passed to shell execution"""
        return {}
Esempio n. 14
0
class Synchronize(CommonBackupPoint):
    parameters = CommonBackupPoint.parameters + [
        Parameter(
            'remote_url',
            required=True,
            help_str=
            'synchronize data to this URL. Must ends by a folder name [*]'),
        Parameter(
            'private_key',
            help_str=
            'private key or certificate associated to \'remote_url\' [*]'),
        Parameter('ca_cert',
                  help_str='CA certificate associated to \'remote_url\'. '
                  'Set to "any" for not checking certificates [*]'),
        Parameter('ssh_options',
                  help_str='SSH options associated to \'url\' [*]'),
        Parameter(
            'keytab',
            help_str=
            'absolute path of the keytab file (for Kerberos authentication) [*]'
        ),
    ]
    checks = CommonBackupPoint.checks + [
        AttributeUniquess('remote_url'),
        FileIsReadable('private_key'),
        CaCertificate('ca_cert'),
        FileIsReadable('keytab')
    ]

    def __init__(self,
                 name,
                 remote_url='',
                 keytab=None,
                 private_key=None,
                 ca_cert=None,
                 ssh_options=None,
                 **kwargs):
        super(Synchronize, self).__init__(name, **kwargs)
        self.remote_url = remote_url
        self.keytab = keytab
        self.private_key = private_key
        self.ca_cert = ca_cert
        self.ssh_options = ssh_options

    def do_backup(self, collect_point, export_data_path, info):
        backend = self._get_backend(collect_point)
        backend.sync_dir_from_local(export_data_path)

    def _get_backend(self, collect_point):
        remote_url = self.format_value(self.remote_url, collect_point)
        keytab = self.format_value(self.keytab, collect_point)
        private_key = self.format_value(self.private_key, collect_point)
        ca_cert = self.format_value(self.ca_cert, collect_point)
        ssh_options = self.format_value(self.ssh_options, collect_point)
        backend = get_backend(collect_point,
                              remote_url,
                              keytab=keytab,
                              private_key=private_key,
                              ca_cert=ca_cert,
                              ssh_options=ssh_options,
                              config=self.config)
        return backend

    def do_restore(self, collect_point, export_data_path):
        backend = self._get_backend(collect_point)
        backend.sync_dir_to_local(export_data_path)
Esempio n. 15
0
class GitlabRepository(GitRepository):
    """Use a remote git repository and push local modifications to it.
    If the 'private_key' is set, then git+ssh is used for pushing data.
    Otherwise, use password or kerberos auth with git+http.

    The backup point is automatically created if required using the HTTP API provided by Gitlab.
    """
    parameters = GitRepository.parameters[:-1] + [
        Parameter(
            'gitlab_url',
            help_str=
            'HTTP URL of the gitlab server (e.g.: \'https://mygitlab.example.org/\') [*]',
            required=True),
        Parameter('project_name',
                  help_str=
                  'Name of the Gitlab project (e.g. \'myuser/myproject\')[*]',
                  required=True),
        Parameter(
            'username',
            help_str=
            'Username to use for pushing data. If you use git+ssh, use the SSH username'
            ' (often \'git\'), otherwise use your real username. [*]'),
        Parameter(
            'password',
            help_str=
            'Password for HTTP auth (if private_key and keytab are not set) [*]'
        ),
        Parameter(
            'api_key',
            help_str='API key allowing for creating new repositories [*]',
            required=True),
    ]
    checks = GitRepository.checks + [
        AttributeUniquess('project_name'),
        GitlabProjectName('project_name')
    ]

    def __init__(self,
                 name,
                 gitlab_url='',
                 api_key=None,
                 project_name='',
                 username='',
                 password='',
                 private_key=None,
                 **kwargs):
        parsed = urlparse(gitlab_url)
        if private_key:
            remote_url = '%s@%s.git' % (username, parsed.hostname)
        else:
            remote_url = '%s://%s:%s@%s/%s.git' % (parsed.scheme, username,
                                                   password, parsed.hostname,
                                                   project_name)
        # noinspection PyTypeChecker
        super(GitlabRepository, self).__init__(name,
                                               private_key=private_key,
                                               remote_url=remote_url,
                                               **kwargs)
        self.api_key = api_key
        self.project_name = project_name
        self.api_url = '%s://%s/api/v3' % (parsed.scheme, parsed.hostname)

    def check_remote_url(self, collect_point):
        project_name = self.format_value(self.project_name, collect_point)
        api_url = self.format_value(self.api_url, collect_point)
        api_key = self.format_value(self.api_key, collect_point)
        remote_url = self.format_value(self.remote_url, collect_point)
        headers = {'PRIVATE-TOKEN': api_key}
        r = requests.get('%s/projects/%s' %
                         (api_url, quote_plus(project_name)),
                         headers=headers)
        if r.status_code == requests.codes.ok:
            return True
        # noinspection PyTypeChecker
        namespace, sep, name = project_name.partition('/')
        data = {'name': name, 'namespace': namespace}
        if self.can_execute_command([
                'curl', '-X', 'POST', '-H',
                'PRIVATE-TOKEN: %s' % api_key,
                '%s/projects/?%s' % (api_url, urlencode(data))
        ]):
            r = requests.post('%s/projects/' % api_url,
                              headers=headers,
                              params=data)
            if r.status_code > 200:
                raise ValueError('Unable to create repository %s' % remote_url)
        # GET /projects/:id/events
        return True
Esempio n. 16
0
class GitRepository(CommonBackupPoint):
    """Use a remote git repository and push local modifications to it.
    Can use https (with password or kerberos auth) or git+ssh remote URLs (with private key authentication).
    local and remote branches are always named 'master'.
    """

    parameters = CommonBackupPoint.parameters + [
        Parameter(
            'keytab',
            help_str=
            'absolute path of the keytab file (for Kerberos authentication) [*]'
        ),
        Parameter(
            'private_key',
            help_str=
            'absolute path of the private key file (for SSH key authentication) [*]'
        ),
        Parameter(
            'commit_email',
            help_str='user email used for signing commits (default: "%s")' %
            DEFAULT_EMAIL),
        Parameter('commit_name',
                  help_str='user name used for signing commits (default: "%s")'
                  % DEFAULT_USERNAME),
        Parameter(
            'commit_message',
            help_str=
            'commit message (default: "Backup {Y}/{m}/{d} {H}:{M}") [*]'),
        Parameter(
            'remote_url',
            help_str=
            'URL of the remote server, including username and password (e.g.: '
            'ssh://[email protected]/project.git, file:///foo/bar/project.git or '
            'https://*****:*****@mygitlab.example.org/username/project.git). '
            'The password is not required for SSH connections (you should use SSH keys).'
            'The backup point must already exists. If you created it by hand, do not '
            'forget to set \'git config --bool core.bare true\'. [*]',
            required=True),
    ]
    checks = CommonBackupPoint.checks + [
        AttributeUniquess('remote_url'),
        FileIsReadable('private_key'),
        FileIsReadable('keytab'),
        Email('commit_email'),
        ValidGitUrl('remote_url')
    ]

    def __init__(self,
                 name,
                 remote_url='',
                 remote_branch='master',
                 private_key=None,
                 keytab=None,
                 commit_name=DEFAULT_USERNAME,
                 commit_email=DEFAULT_EMAIL,
                 commit_message='Backup {Y}/{m}/{d} {H}:{M}',
                 **kwargs):
        super(GitRepository, self).__init__(name, **kwargs)
        self.keytab = keytab
        self.private_key = private_key
        self.remote_url = remote_url
        self.remote_branch = remote_branch
        self.commit_name = commit_name
        self.commit_email = commit_email
        self.commit_message = commit_message

    def do_backup(self, collect_point, export_data_path, info):
        assert isinstance(collect_point, CollectPoint)  # just to help PyCharm
        worktree = export_data_path
        git_dir = os.path.join(self.private_path(collect_point), 'git')
        os.chdir(worktree)
        git_command = [
            self.config.git_executable, '--git-dir', git_dir, '--work-tree',
            worktree
        ]
        self.execute_command(git_command + ['init'], cwd=worktree)
        self.execute_command([
            self.config.git_executable, 'config', '--global', 'user.email',
            self.commit_email
        ],
                             env={'HOME': git_dir})
        self.execute_command([
            self.config.git_executable, 'config', '--global', 'user.name',
            self.commit_name
        ],
                             env={'HOME': git_dir})
        self.execute_command(git_command + ['add', '.'])
        commit_message = self.format_value(self.commit_message,
                                           collect_point,
                                           check_metadata_requirement=False)
        # noinspection PyTypeChecker
        self.execute_command(git_command + ['commit', '-am', commit_message],
                             ignore_errors=True,
                             env={'HOME': git_dir})

        remote_url = self.format_value(self.remote_url, collect_point)
        if not self.check_remote_url(collect_point):
            raise ValueError('Invalid backup point: %s' % remote_url)
        cmd = []
        if self.keytab:
            keytab = self.format_value(self.keytab,
                                       collect_point,
                                       check_metadata_requirement=False)
            cmd += ['k5start', '-q', '-f', keytab, '-U', '--']
        cmd += git_command + ['push', remote_url, 'master:master']
        # noinspection PyTypeChecker
        if self.private_key and not remote_url.startswith('http'):
            private_key = self.format_value(self.private_key,
                                            collect_point,
                                            check_metadata_requirement=False)
            cmd = [
                'ssh-agent', 'bash', '-c',
                'ssh-add %s ; %s' % (private_key, ' '.join(cmd))
            ]
        self.execute_command(cmd, cwd=worktree, env={'HOME': git_dir})

    def check_remote_url(self, collect_point):
        return True

    def do_restore(self, collect_point, export_data_path):
        assert isinstance(collect_point, CollectPoint)  # just to help PyCharm
        worktree = export_data_path
        git_dir = os.path.join(self.private_path(collect_point), 'git')
        self.ensure_dir(git_dir, parent=True)
        self.ensure_absent(git_dir)
        self.ensure_dir(worktree, parent=True)
        self.ensure_absent(worktree)
        remote_url = self.format_value(self.remote_url, collect_point)
        cmd = [
            self.config.git_executable, 'clone', '--separate-git-dir', git_dir,
            remote_url, worktree
        ]
        if self.keytab:
            keytab = self.format_value(self.keytab,
                                       collect_point,
                                       check_metadata_requirement=False)
            cmd += ['k5start', '-q', '-f', keytab, '-U', '--']
        if self.private_key and not remote_url.startswith('http'):
            private_key = self.format_value(self.private_key,
                                            collect_point,
                                            check_metadata_requirement=False)
            cmd = [
                'ssh-agent', 'bash', '-c',
                'ssh-add %s ; %s' % (private_key, ' '.join(cmd))
            ]
        self.execute_command(cmd, cwd=os.path.dirname(worktree))
Esempio n. 17
0
class BackupPoint(Point):
    constant_format_values = base_variables(use_constants=True)
    parameters = Point.parameters + [
        Parameter(
            'backup_point_tags',
            converter=strip_split,
            help_str=
            'list of tags (comma-separated) associated to this backup point (default: "backup")'
        ),
        Parameter(
            'included_collect_point_tags',
            converter=strip_split,
            help_str=
            'any collect point with one of these tags (comma-separated) will be associated '
            'to this backup point. You can use ? or * as jokers in these tags.'
        ),
        Parameter(
            'excluded_collect_point_tags',
            converter=strip_split,
            help_str=
            'any collect point with one of these tags (comma-separated) will not be associated'
            ' to this backup point. You can use ? or * as jokers in these tags. Have precedence over '
            'included_collect_point_tags and included_backup_point_tags.'),
    ]
    checks = []

    # list of callable(runner, backup_point, collect_points)

    def __init__(self,
                 name,
                 backup_point_tags=None,
                 included_collect_point_tags=None,
                 excluded_collect_point_tags=None,
                 **kwargs):
        super(BackupPoint, self).__init__(name, **kwargs)
        self.backup_point_tags = [
            'backup'
        ] if backup_point_tags is None else backup_point_tags
        self.included_collect_point_tags = [
            '*'
        ] if included_collect_point_tags is None else included_collect_point_tags
        self.excluded_collect_point_tags = excluded_collect_point_tags or []
        self.collect_point_variables = {}
        # values specific to a collect_point: self.collect_point_variables[collect_point.name][key] = value
        # used to override remote parameters

    def format_value(self, value, collect_point, use_constant_values=False):
        if value is None:
            return None
        assert isinstance(collect_point, CollectPoint)
        variables = {}
        variables.update(self.variables)
        variables.update(collect_point.variables)
        if collect_point.name in self.collect_point_variables:
            variables.update(self.collect_point_variables[collect_point.name])
        if use_constant_values:
            variables.update(self.constant_format_values)
        try:
            formatted_value = value.format(**variables)
        except KeyError as e:
            txt = text_type(e)[len('KeyError:'):]
            raise ValueError(
                'Unable to format \'%s\': variable %s is missing' %
                (value, txt))
        return formatted_value

    def backup(self, collect_point, force=False):
        """ perform the backup and log all errors
        """
        self.print_info('backup point %s of collect point %s' %
                        (self.name, collect_point.name))
        info = self.get_info(collect_point)
        assert isinstance(info, PointInfo)
        assert isinstance(collect_point, CollectPoint)
        out_of_date = self.check_out_of_date_backup(
            current_time=datetime.datetime.now(),
            previous_time=info.last_success)
        if not (force or out_of_date):
            # the last previous backup is still valid
            # => nothing to do
            self.print_success(
                'last backup (%s) is still valid. No backup to do.' %
                info.last_success)
            return True
        elif info.last_success is None:
            self.print_info('no previous backup: a new backup is required.')
        elif out_of_date:
            self.print_info('last backup (%s) is out-of-date.' %
                            str(info.last_success))
        elif force:
            self.print_info(
                'last backup (%s) is still valid but a new backup is forced.' %
                str(info.last_success))
        lock_ = None
        # collect only (but all) variables that are related to host and time
        info.variables = {
            k: v
            for (k, v) in list(collect_point.variables.items())
            if k in self.constant_format_values
        }
        # these variables are required for a valid restore
        cwd = os.getcwd()
        try:
            if self.can_execute_command('# get lock'):
                lock_ = collect_point.get_lock()
            export_data_path = self.apply_backup_filters(collect_point)
            self.do_backup(collect_point, export_data_path, info)
            info.success_count += 1
            info.last_state_valid = True
            info.last_success = datetime.datetime.now()
            info.last_message = 'ok'
        except Exception as e:
            self.print_error('unable to perform backup: %s' % text_type(e))
            info.fail_count += 1
            info.last_fail = datetime.datetime.now()
            info.last_state_valid = False
            info.last_message = text_type(e)
        finally:
            os.chdir(cwd)
        if lock_ is not None:
            try:
                if self.can_execute_command('# release lock'):
                    collect_point.release_lock(lock_)
            except Exception as e:
                self.print_error('unable to release lock. %s' % text_type(e))
        if self.can_execute_command('# register this backup point state'):
            self.set_info(collect_point, info)
        return info.last_state_valid

    def do_backup(self, collect_point, export_data_path, info):
        """send backup data from the collect point
        :param collect_point: the collect point
        :param export_data_path: where all data are stored (path)
        :param info: PointInfo object. its attribute `data` can be freely updated
        """
        raise NotImplementedError

    def apply_backup_filters(self, collect_point):
        assert isinstance(collect_point, CollectPoint)
        next_path = collect_point.export_data_path
        for filter_ in self.filters:
            assert isinstance(filter_, FileFilter)
            next_path = filter_.backup(next_path,
                                       self.filter_private_path(
                                           collect_point, filter_),
                                       allow_in_place=False)
        return next_path

    def apply_restore_filters(self, collect_point):
        assert isinstance(collect_point, CollectPoint)
        next_path = collect_point.export_data_path
        filter_data = []
        for filter_ in self.filters:
            assert isinstance(filter_, FileFilter)
            filter_data.append((filter_, next_path))
            next_path = filter_.next_path(next_path,
                                          self.filter_private_path(
                                              collect_point, filter_),
                                          allow_in_place=False)
        for filter_, next_path in reversed(filter_data):
            assert isinstance(filter_, FileFilter)
            filter_.restore(next_path,
                            self.filter_private_path(collect_point, filter_),
                            allow_in_place=False)

    # noinspection PyMethodMayBeStatic
    def get_info(self, collect_point, force_backup=False):
        assert isinstance(collect_point, CollectPoint)
        path = os.path.join(self.private_path(collect_point),
                            '%s.json' % self.name)
        if os.path.isfile(path):
            with codecs.open(path, 'r', encoding='utf-8') as fd:
                content = fd.read()
            return PointInfo.from_str(content)
        else:
            return PointInfo()

    # noinspection PyMethodMayBeStatic
    def set_info(self, collect_point, info):
        assert isinstance(collect_point, CollectPoint)
        assert isinstance(info, PointInfo)
        path = os.path.join(self.private_path(collect_point),
                            '%s.json' % self.name)
        self.ensure_dir(path, parent=True)
        content = info.to_str()
        with codecs.open(path, 'w', encoding='utf-8') as fd:
            fd.write(content)

    def restore(self, collect_point):
        info = self.get_info(collect_point, force_backup=True)
        assert isinstance(collect_point, CollectPoint)
        assert isinstance(info, PointInfo)
        collect_point.variables.update(info.variables)
        next_path = collect_point.export_data_path
        for filter_ in self.filters:
            assert isinstance(filter_, FileFilter)
            next_path = filter_.next_path(next_path,
                                          self.filter_private_path(
                                              collect_point, filter_),
                                          allow_in_place=False)
        self.do_restore(collect_point, next_path)
        self.apply_restore_filters(collect_point)

    def do_restore(self, collect_point, export_data_path):
        raise NotImplementedError

    @lru_cache()
    def private_path(self, collect_point):
        assert isinstance(collect_point, CollectPoint)
        return os.path.join(collect_point.backup_point_private_path(self),
                            'remote')

    @lru_cache()
    def filter_private_path(self, collect_point, filter_):
        assert isinstance(collect_point, CollectPoint)
        assert isinstance(filter_, FileFilter)
        return os.path.join(collect_point.backup_point_private_path(self),
                            'filter-%s' % filter_.name)

    def execute_hook(self, when, cm, collect_point, result=None):
        assert isinstance(collect_point, CollectPoint)
        result_ = {(self.name, collect_point.name): result}
        for hook in self.hooks:
            assert isinstance(hook, Hook)
            if when in hook.hooked_events:
                hook.call(when, cm, {collect_point.name: True}, result_)
Esempio n. 18
0
class CommonBackupPoint(BackupPoint):
    """A BackupPoint with meaningful implementations pour set_info/get_info"""
    parameters = BackupPoint.parameters + [
        Parameter(
            'metadata_url',
            required=False,
            help_str=
            'send metadata (about the successful last backup) to this URL.'
            'Should end by "/" or use the {name} variable [**]'),
        Parameter('metadata_private_key',
                  help_str='private key associated to \'metadata_url\' [**]'),
        Parameter(
            'metadata_ca_cert',
            help_str='private certificate associated to \'metadata_url\' [**]'
        ),
        Parameter(
            'metadata_keytab',
            help_str=
            'keytab (for Kerberos authentication) associated to \'metadata_url\' [**]'
        ),
        Parameter('metadata_ssh_options',
                  help_str='SSH options associated to \'metadata_url\' [**]'),
    ]
    checks = BackupPoint.checks + [
        AttributeUniquess('metadata_url'),
        FileIsReadable('metadata_private_key'),
        FileIsReadable('metadata_keytab'),
        CaCertificate('metadata_ca_cert')
    ]

    def __init__(self,
                 name,
                 metadata_url=None,
                 metadata_private_key=None,
                 metadata_ca_cert=None,
                 metadata_keytab=None,
                 metadata_ssh_options=None,
                 **kwargs):
        super(CommonBackupPoint, self).__init__(name, **kwargs)
        self.metadata_url = metadata_url
        self.metadata_private_key = metadata_private_key
        self.metadata_ca_cert = metadata_ca_cert
        self.metadata_keytab = metadata_keytab
        self.metadata_ssh_options = metadata_ssh_options
        self.metadata_url_requirements = []
        # list of values using non-constant values

    def format_value(self,
                     value,
                     collect_point,
                     use_constant_values=False,
                     check_metadata_requirement=True):
        """Check if the metadata_url is required: at least one formatted value uses non-constant values"""
        if use_constant_values:
            return super(CommonBackupPoint,
                         self).format_value(value, collect_point,
                                            use_constant_values)
        result = super(CommonBackupPoint,
                       self).format_value(value, collect_point, False)
        if check_metadata_requirement:
            constant_result = super(CommonBackupPoint,
                                    self).format_value(value, collect_point,
                                                       True)
            if constant_result != result:
                self.metadata_url_requirements.append(value)
        return result

    def do_restore(self, collect_point, export_data_path):
        raise NotImplementedError

    def do_backup(self, collect_point, export_data_path, info):
        raise NotImplementedError

    def _get_metadata_backend(self, collect_point):
        assert isinstance(collect_point, CollectPoint)
        if self.metadata_url is None:
            p1 = 's' if len(self.metadata_url_requirements) > 1 else ''
            p2 = '' if len(self.metadata_url_requirements) > 1 else ''
            if self.metadata_url_requirements:
                self.print_error(
                    'value%s "%s" use%s time/host-dependent variables. '
                    'You should define the "metadata_url" parameter to ease restore operation'
                    % (p1, ', '.join(self.metadata_url_requirements), p2))
            return None
        metadata_url = self.format_value(self.metadata_url,
                                         collect_point,
                                         use_constant_values=True)
        if metadata_url.endswith('/'):
            metadata_url += '%s.json' % collect_point.name
        metadata_private_key = self.format_value(self.metadata_private_key,
                                                 collect_point,
                                                 use_constant_values=True)
        metadata_ca_cert = self.format_value(self.metadata_ca_cert,
                                             collect_point,
                                             use_constant_values=True)
        metadata_keytab = self.format_value(self.metadata_keytab,
                                            collect_point,
                                            use_constant_values=True)
        metadata_ssh_options = self.format_value(self.metadata_ssh_options,
                                                 collect_point,
                                                 use_constant_values=True)
        backend = get_backend(self,
                              metadata_url,
                              keytab=metadata_keytab,
                              private_key=metadata_private_key,
                              ca_cert=metadata_ca_cert,
                              ssh_options=metadata_ssh_options,
                              config=self.config)
        assert isinstance(backend, StorageBackend)
        return backend

    @lru_cache()
    def get_info(self, collect_point, force_backup=False):
        assert isinstance(collect_point, CollectPoint)
        path = os.path.join(self.private_path(collect_point),
                            '%s.json' % self.name)
        if not os.path.isfile(path) or force_backup:
            self.ensure_dir(path, parent=True)
            backend = self._get_metadata_backend(collect_point)
            if backend is not None:
                # noinspection PyBroadException
                try:
                    backend.sync_file_to_local(path)
                except:  # happens on the first sync (no remote data available)
                    pass
        if os.path.isfile(path) and not force_backup:
            with codecs.open(path, 'r', encoding='utf-8') as fd:
                content = fd.read()
            return PointInfo.from_str(content)
        return PointInfo()

    def set_info(self, collect_point, info):
        assert isinstance(collect_point, CollectPoint)
        assert isinstance(info, PointInfo)
        path = os.path.join(self.private_path(collect_point),
                            '%s.json' % self.name)
        self.ensure_dir(path, parent=True)
        content = info.to_str()
        with codecs.open(path, 'w', encoding='utf-8') as fd:
            fd.write(content)
        backend = self._get_metadata_backend(collect_point)
        if backend is not None:
            backend.sync_file_from_local(path)
Esempio n. 19
0
class TarArchive(CommonBackupPoint):
    """Gather all files of your collect point into a .tar archive (.tar.gz, .tar.bz2 or .tar.xz) and copy it to
    the remote URL.
    """

    excluded_files = {'.git', '.gitignore'}
    parameters = CommonBackupPoint.parameters + [
        Parameter(
            'remote_url',
            required=True,
            help_str=
            'synchronize data to this URL, like \'ssh://user@hostname/folder/archive.tar.gz\'. '
            'Must end by ".tar.gz", "tar.bz2", "tar.xz" [*]'),
        Parameter(
            'private_key',
            help_str=
            'private key or certificate associated to \'remote_url\' [*]'),
        Parameter('ca_cert',
                  help_str='CA certificate associated to \'remote_url\'. '
                  'Set to "any" for not checking certificates [*]'),
        Parameter('ssh_options',
                  help_str='SSH options associated to \'url\' [*]'),
        Parameter(
            'keytab',
            help_str=
            'absolute path of the keytab file (for Kerberos authentication) [*]'
        ),
    ]
    checks = CommonBackupPoint.checks + [
        AttributeUniquess('remote_url'),
        FileIsReadable('private_key'),
        CaCertificate('ca_cert'),
        FileIsReadable('keytab')
    ]

    def __init__(self,
                 name,
                 remote_url='',
                 keytab=None,
                 private_key=None,
                 ca_cert=None,
                 ssh_options=None,
                 **kwargs):
        super(TarArchive, self).__init__(name, **kwargs)
        self.remote_url = remote_url
        self.keytab = keytab
        self.private_key = private_key
        self.ca_cert = ca_cert
        self.ssh_options = ssh_options

    def _get_backend(self, collect_point):
        remote_url = self.format_value(self.remote_url, collect_point)
        keytab = self.format_value(self.keytab, collect_point)
        private_key = self.format_value(self.private_key, collect_point)
        ca_cert = self.format_value(self.ca_cert, collect_point)
        ssh_options = self.format_value(self.ssh_options, collect_point)
        backend = get_backend(collect_point,
                              remote_url,
                              keytab=keytab,
                              private_key=private_key,
                              ca_cert=ca_cert,
                              ssh_options=ssh_options,
                              config=self.config)
        return backend

    def do_backup(self, collect_point, export_data_path, info):
        assert isinstance(collect_point, CollectPoint)
        backend = self._get_backend(collect_point)
        remote_url = self.format_value(self.remote_url, collect_point)
        archive_filename = self.archive_name_prefix(collect_point)
        if remote_url.endswith('tar.gz'):
            archive_filename += '.tar.gz'
            cmd = [self.config.tar_executable, '-czf', archive_filename]
        elif remote_url.endswith('tar.bz2'):
            archive_filename += '.tar.bz2'
            cmd = [self.config.tar_executable, '-cjf', archive_filename]
        elif remote_url.endswith('tar.xz'):
            archive_filename += '.tar.xz'
            cmd = [self.config.tar_executable, '-cJf', archive_filename]
        else:
            raise ValueError('invalid tar format: %s' % remote_url)
        filenames = os.listdir(export_data_path)
        filenames.sort()
        cmd += filenames
        returncode, stdout, stderr = self.execute_command(cmd,
                                                          cwd=export_data_path,
                                                          ignore_errors=True)
        error = None
        if returncode != 0:
            error = ValueError('unable to create archive %s' %
                               archive_filename)
        else:
            try:
                backend.sync_file_from_local(archive_filename)
            except Exception as e:
                error = e
        self.ensure_absent(archive_filename)
        if error is not None:
            raise error

    def archive_name_prefix(self, collect_point):
        return os.path.join(self.private_path(collect_point), 'archive')

    def do_restore(self, collect_point, export_data_path):
        assert isinstance(collect_point, CollectPoint)
        backend = self._get_backend(collect_point)
        remote_url = self.format_value(self.remote_url, collect_point)
        archive_filename = self.archive_name_prefix(collect_point)
        if remote_url.endswith('tar.gz'):
            archive_filename += '.tar.gz'
        elif remote_url.endswith('tar.bz2'):
            archive_filename += '.tar.bz2'
        elif remote_url.endswith('tar.xz'):
            archive_filename += '.tar.xz'
        else:
            raise ValueError('invalid tar format: %s' % remote_url)
        backend.sync_file_to_local(archive_filename)
        self.ensure_dir(export_data_path)
        self.execute_command([
            self.config.tar_executable, '-C', export_data_path, '-xf',
            archive_filename
        ])
Esempio n. 20
0
class Ldap(Source):
    """Dump a OpenLDAP database using slapcat to a filename in the collect point.
    Must be run on the LDAP server with a sudoer account (or 'root'). Require the 'slapcat' and 'slapadd' utilities. """
    parameters = Source.parameters + [
        Parameter('destination_path',
                  help_str='filename of the dump (not an absolute path)'),
        Parameter('use_sudo',
                  help_str='use sudo to perform the dump (yes/no)',
                  converter=bool_setting),
        Parameter(
            'data_directory',
            help_str='your LDAP base (if you want to restrict the dump)'),
        Parameter(
            'ldap_base',
            help_str='your LDAP base dn (if you want to restrict the dump)'),
        Parameter('database',
                  help_str='database number (default: 1)',
                  converter=int),
        Parameter(
            'dump_executable',
            converter=check_executable,
            help_str='path of the slapcat executable (default: "slapcat")'),
        Parameter(
            'restore_executable',
            converter=check_executable,
            help_str='path of the slapadd executable (default: "slapadd")'),
    ]

    def __init__(self,
                 name,
                 collect_point,
                 destination_path='ldap.ldif',
                 dump_executable='slapcat',
                 use_sudo=False,
                 restore_executable='slapadd',
                 database=1,
                 ldap_base=None,
                 **kwargs):
        super(Ldap, self).__init__(name, collect_point, **kwargs)
        self.destination_path = destination_path
        self.dump_executable = dump_executable
        self.restore_executable = restore_executable
        self.use_sudo = use_sudo
        self.ldap_base = ldap_base
        self.database = database

    def backup(self):
        filename = os.path.join(self.collect_point.import_data_path,
                                self.destination_path)
        self.ensure_dir(filename, parent=True)
        cmd = []
        if self.use_sudo:
            cmd += ['sudo']
        cmd += [self.dump_executable]
        if self.ldap_base:
            cmd += ['-b', self.ldap_base]
        cmd += ['-n', str(self.database)]
        self.execute_command(cmd)
        if not self.can_execute_command(cmd + ['>', filename]):
            filename = os.devnull  # run the dump even in dry mode
        with open(filename, 'wb') as fd:
            p = subprocess.Popen(cmd, stdout=fd, stderr=self.stderr)
            p.communicate()

    def restore(self):
        filename = os.path.join(self.collect_point.import_data_path,
                                self.destination_path)
        if not os.path.isfile(filename):
            return
        prefix = []
        if self.use_sudo:
            prefix += ['sudo']
        # identify the database folder
        p = subprocess.Popen(prefix + [self.dump_executable, '-n', '0'],
                             stdout=subprocess.PIPE,
                             stderr=self.stderr)
        stdout, __ = p.communicate()
        database_folder = self.get_database_folder(io.BytesIO(stdout),
                                                   str(self.database))
        if database_folder is None:
            raise IOError('Unable to find database folder for database %s' %
                          self.database)
        stat_info = os.stat(database_folder)
        uid = stat_info.st_uid
        gid = stat_info.st_gid
        user = pwd.getpwuid(uid)[0]
        group = grp.getgrgid(gid)[0]

        self.execute_command(prefix + ['service', 'slapd', 'stop'])
        self.execute_command(prefix + ['rm', '-rf', database_folder])
        self.execute_command(prefix + ['mkdir', '-p', database_folder])
        self.execute_command(prefix + [
            self.restore_executable,
            '-l',
            filename,
        ])
        self.execute_command(
            prefix + ['chown', '-R',
                      '%s:%s' % (user, group), database_folder])
        self.execute_command(prefix + ['service', 'slapd', 'start'])

    @staticmethod
    def get_database_folder(ldif_config, database_number):
        parser = LDIFParser(ldif_config)
        regexp = re.compile('^olcDatabase=\{%s\}(.*),cn=config$' %
                            database_number)
        for dn, entry in parser.parse():
            if not regexp.match(dn):
                continue
            return entry.get('olcDbDirectory', [None])[0]
        return None
Esempio n. 21
0
class RollingTarArchive(TarArchive):
    """Gather all files of your collect point into a .tar archive (.tar.gz, .tar.bz2 or .tar.xz) and copy it to the
     remote URL.

    Also tracks previous archives to only keep a given number of hourly/daily/weekly/yearly backups,
    deleting unneeded ones.

    """

    parameters = TarArchive.parameters + [
        Parameter('hourly_count',
                  converter=int,
                  default_str_value='0',
                  help_str='Number of hourly backups to keep (default to 0)'),
        Parameter('daily_count',
                  converter=int,
                  default_str_value='30',
                  help_str='Number of daily backups to keep (default to 30)'),
        Parameter('weekly_count',
                  converter=int,
                  default_str_value='100',
                  help_str='Number of weekly backups to keep '
                  '(default to 100)'),
        Parameter('yearly_count',
                  converter=int,
                  default_str_value='200',
                  help_str='Number of yearly backups to keep (fefault to 20)'),
    ]
    for index, parameter in enumerate(parameters):
        if parameter.arg_name == 'remote_url':
            parameters[index] = Parameter(
                'remote_url',
                required=True,
                help_str=
                'synchronize data to this URL (SHOULD DEPEND ON THE DATE AND TIME): '
                '\'file:///var/backup/archive-{Y}-{m}-{d}_{H}-{M}.tar.gz\''
                'Must end by ".tar.gz", "tar.bz2", "tar.xz" [*]')
            break

    def __init__(self,
                 name,
                 hourly_count=1,
                 daily_count=30,
                 weekly_count=10,
                 yearly_count=20,
                 **kwargs):
        super(RollingTarArchive, self).__init__(name, **kwargs)
        self.hourly_count = hourly_count
        self.daily_count = daily_count
        self.weekly_count = weekly_count
        self.yearly_count = yearly_count

    def do_backup(self, collect_point, export_data_path, info):
        super(RollingTarArchive, self).do_backup(collect_point,
                                                 export_data_path, info)
        if info.data is None:
            info.data = []
            # info.data must be a list of dict (old values)
        info.data.append(info.variables)
        if self.can_execute_command('# register this backup point state'):
            info.last_state_valid = True
            info.last_success = datetime.datetime.now()
            self.set_info(collect_point, info)
        # ok, there we have to check which old backup must be removed
        values = []
        time_to_values = {}
        # noinspection PyTypeChecker
        for value_dict in info.data:
            d = datetime.datetime(year=int(value_dict['Y']),
                                  month=int(value_dict['m']),
                                  day=int(value_dict['d']),
                                  hour=int(value_dict['H']),
                                  minute=int(value_dict['M']),
                                  second=int(value_dict['S']))
            values.append(d)
            time_to_values[d] = value_dict
        values.sort(reverse=True)
        times = OrderedDict()
        for d in values:
            times[d] = False
        now = datetime.datetime.now()
        if self.hourly_count:
            times = self.set_accepted_times(
                datetime.timedelta(hours=1),
                times,
                not_before_time=now -
                datetime.timedelta(hours=self.hourly_count))
        if self.daily_count:
            times = self.set_accepted_times(
                datetime.timedelta(days=1),
                times,
                not_before_time=now -
                datetime.timedelta(days=self.daily_count))
        if self.weekly_count:
            times = self.set_accepted_times(
                datetime.timedelta(days=7),
                times,
                not_before_time=now -
                datetime.timedelta(days=self.weekly_count * 7))
        if self.yearly_count:
            times = self.set_accepted_times(
                datetime.timedelta(days=365),
                times,
                not_before_time=now -
                datetime.timedelta(days=self.yearly_count * 365))
        to_remove_values = [d for (d, v) in list(times.items()) if not v]
        to_keep_values = [d for (d, v) in list(times.items()) if v]
        info.data = [time_to_values[d] for d in reversed(to_keep_values)]
        for data in to_remove_values:
            collect_point.variables = time_to_values[data]
            backend = self._get_backend(collect_point)
            backend.delete_on_distant()

    @staticmethod
    def set_accepted_times(min_accept_interval,
                           ordered_times,
                           not_before_time=None,
                           not_after_time=None):
        """ 'require at least one `True` value in `ordered_times` each `min_accept_interval` until `max_checked_time`.
        :param min_accept_interval: at least one True value is required in this interval
        :param ordered_times: is an OrderedDict with datetime keys and boolean values.
        :param not_before_time: any key smaller than it is ignored
        :param not_after_time: any key greater than it is ignored

        >>> times = OrderedDict()
        >>> times[0] = False
        >>> times[3] = False
        >>> times[4] = False
        >>> times[5] = False
        >>> times[7] = False
        >>> times[8] = False
        >>> times[9] = False
        >>> result = RollingTarArchive.set_accepted_times(3, times, not_after_time=14)
        >>> print(result)
        OrderedDict([(0, True), (3, True), (4, False), (5, False), (7, True), (8, False), (9, False)])

         """
        assert isinstance(ordered_times, OrderedDict)
        previous_time = None
        result = OrderedDict()
        for current_time, state in list(ordered_times.items()):
            if not_before_time is not None and current_time < not_before_time:
                result[current_time] = state
            elif not_after_time is not None and current_time > not_after_time:
                result[current_time] = state
            elif previous_time is None:
                result[current_time] = True
            elif abs(previous_time - current_time) >= min_accept_interval:
                result[current_time] = True
            else:
                result[current_time] = state
            if result[current_time]:
                previous_time = current_time
        return result

    def archive_name_prefix(self, collect_point):
        archive_name = self.format_value('archive-{Y}-{m}-{d}_{H}-{M}',
                                         collect_point)
        return os.path.join(self.private_path(collect_point), archive_name)
Esempio n. 22
0
class EmailHook(Hook):
    """Send an email to one or more recipient when the hook is called.
    Some extra variables are available:

        * "status" ('--' for "before_backup" hooks, 'OK' or 'KO' otherwise) ,
        * "detailed_status" (one 'KO'/'OK' per line, for each backup or collect point, empty for "before_backup" hooks)
        * "complete_log" (the complete stdout log).
    """
    default_content = "{detailed_status}\n\n{complete_log}"
    default_subject = "[BACKUP][{fqdn}] {Y}/{m}/{d} {H}:{M} [{status}]"
    parameters = Hook.parameters + [
        Parameter('recipient',
                  required=True,
                  help_str='recipients, separated by commas [*]'),
        Parameter('subject',
                  help_str='subject (default to "%s") [*]' % default_subject),
        Parameter(
            'content',
            help_str='mail content (default to "%s") [*]' % default_content),
        Parameter('sender',
                  help_str='from address (default to %s) [*]' % DEFAULT_EMAIL),
        Parameter('hostname',
                  help_str='SMTP server name (default to "localhost")'),
        Parameter('port', help_str='SMTP server port', converter=int),
        Parameter('username', help_str='SMTP client username'),
        Parameter('password', help_str='SMTP client password'),
        Parameter('keyfile', help_str='client PEM key file'),
        Parameter('certfile', help_str='client PEM cert file'),
        Parameter('encryption',
                  help_str='Encryption method ("none", "starttls" or "tls")',
                  converter=CheckOption(["none", "starttls", "tls"])),
    ]

    def __init__(self,
                 name,
                 runner,
                 recipient='',
                 subject=default_subject,
                 content=default_content,
                 sender=DEFAULT_EMAIL,
                 hostname='localhost',
                 port=0,
                 username=None,
                 password=None,
                 keyfile=None,
                 certfile=None,
                 encryption="none",
                 **kwargs):
        super(EmailHook, self).__init__(name, runner, **kwargs)
        self.recipient = recipient
        self.subject = subject
        self.content = content
        self.sender = sender
        self.hostname = hostname
        self.port = port
        self.username = username
        self.password = password
        self.keyfile = keyfile
        self.certfile = certfile
        self.encryption = encryption

    def call(self, when, cm, collect_point_results, backup_point_results):
        self.set_extra_variables(cm, collect_point_results,
                                 backup_point_results)
        msg = MIMEText(self.format_value(self.content))
        # me == the sender's email address
        # you == the recipient's email address
        msg['Subject'] = self.format_value(self.subject)
        msg['From'] = self.format_value(self.sender)
        msg['To'] = self.format_value(self.recipient)

        # Send the message via our own SMTP server.
        if self.encryption == "tls":
            smtp = smtplib.SMTP_SSL(host=self.hostname,
                                    port=self.port,
                                    keyfile=self.keyfile,
                                    certfile=self.certfile)
        else:
            smtp = smtplib.SMTP(host=self.hostname, port=self.port)
        if self.encryption == 'starttls':
            smtp.starttls(keyfile=self.keyfile, certfile=self.certfile)
        if self.username and self.password:
            smtp.login(self.username, self.password)
        smtp.send_message(msg)
        smtp.quit()
Esempio n. 23
0
class Dovecot(Source):
    """Dump a OpenLDAP database using slapcat to a filename in the collect point. Require the 'doveadm' utility."""
    parameters = Source.parameters + [
        Parameter('destination_path',
                  help_str='dirname of the dump (not an absolute path)'),
        Parameter('mailbox', help_str='only sync this mailbox name'),
        Parameter(
            'socket',
            help_str=
            'The option\'s argument is either an absolute path to a local UNIX domain socket,'
            ' or a hostname and port (hostname:port), in order to connect a remote host via a'
            ' TCP socket.'),
        Parameter(
            'user_mask',
            help_str='only sync this user ("*" and "?" wildcards can be used).'
        ),
        Parameter(
            'dump_executable',
            converter=check_executable,
            help_str='path of the doveadm executable (default: "doveadm")'),
    ]

    def __init__(self,
                 name,
                 collect_point,
                 destination_path='dovecot',
                 dump_executable='doveadm',
                 mailbox=None,
                 user_mask=None,
                 socket=None,
                 **kwargs):
        super(Dovecot, self).__init__(name, collect_point, **kwargs)
        self.socket = socket
        self.destination_path = destination_path
        self.dump_executable = dump_executable
        self.mailbox = mailbox
        self.user_mask = user_mask

    def backup(self):
        self.perform_action(restore=False)

    def restore(self):
        self.perform_action(restore=True)

    def perform_action(self, restore):
        dirname = os.path.join(self.collect_point.import_data_path,
                               self.destination_path)
        self.ensure_dir(dirname)
        cmd = [
            self.dump_executable,
            'backup',
        ]
        if restore:
            cmd += ['-R']
        if self.mailbox:
            cmd += [
                '-m',
                self.mailbox,
            ]
        if self.socket:
            cmd += ['-S', self.socket]
        if self.user_mask is None:
            cmd += ['-A']
        else:
            cmd += ['-u', self.user_mask]
        cmd += [dirname]
        self.execute_command(cmd)
Esempio n. 24
0
class Hook(ParameterizedObject):
    parameters = ParameterizedObject.parameters + [
        Parameter(
            'events',
            converter=strip_split,
            required=True,
            help_str=
            'list of events (comma-separated) that trigger this hook: "before_backup",'
            '"backup_success", "backup_error", "after_backup".'),
    ]
    keep_output = True

    def __init__(self,
                 name,
                 runner,
                 parameterized_object,
                 events=None,
                 **kwargs):
        super(Hook, self).__init__(name, **kwargs)
        assert isinstance(parameterized_object, ParameterizedObject)
        self.runner = runner
        self.parameterized_object = parameterized_object
        self.hooked_events = set(events)

    def stderr(self):
        return self.runner.stderr

    def stdout(self):
        return self.runner.stdout

    def print_message(self, *args, **kwargs):
        return self.runner.print_message(*args, **kwargs)

    def call(self, when, cm, collect_point_results, backup_point_results):
        assert isinstance(when, text_type)
        assert isinstance(cm, FileContentMonitor)
        assert isinstance(collect_point_results,
                          dict)  # dict[collect_point.name] = True/False
        assert isinstance(
            backup_point_results,
            dict)  # dict[(backup_point.name, collect_point.name)] = True/False
        raise NotImplementedError

    def set_extra_variables(self, cm, collect_point_results,
                            backup_point_results):
        self.variables.update(self.parameterized_object.variables)
        assert isinstance(cm, FileContentMonitor)
        content = cm.get_text_content()
        if not collect_point_results and not backup_point_results:
            self.variables['status'] = '--'
        elif all(collect_point_results.values()) and all(
                backup_point_results.values()):
            self.variables['status'] = 'OK'
        else:
            self.variables['status'] = 'KO'
        text_values = {True: 'OK', False: 'KO'}
        detailed_status = [
            '%s: %s' % (text_values[key], key)
            for key in sorted(collect_point_results)
        ]
        detailed_status += [
            '%s: %s on %s' % (text_values[key], key[0], key[1])
            for key in sorted(backup_point_results)
        ]
        self.variables.update({
            'complete_log': content,
            'detailed_status': '\n'.join(detailed_status)
        })
Esempio n. 25
0
class LocalFiles(Source):
    """copy all files from the given source_path to the collect point using 'rsync'.
    The destination is a folder inside the collect point.
    """
    parameters = Source.parameters + [
        Parameter('source_path',
                  converter=check_directory,
                  help_str='original folder to backup',
                  required=True),
        Parameter(
            'destination_path',
            help_str='destination folder (relative path, e.g. "./files")',
            required=True),
        Parameter(
            'exclude',
            help_str=
            'exclude files matching PATTERN (see --exclude option from rsync). '
            'If PATTERN startswith @, then it should be the absolute path of a file '
            '(see --exclude-from option from rsync)'),
        Parameter(
            'include',
            help_str=
            'only include files matching PATTERN (see --include option from rsync). '
            'If PATTERN startswith @, then it should be the absolute path of a file '
            '(see --include-from option from rsync)'),
        Parameter('preserve_hard_links',
                  converter=bool_setting,
                  help_str='true|false: preserve hard links'),
    ]

    def __init__(self,
                 name,
                 collect_point,
                 source_path='',
                 destination_path='',
                 exclude='',
                 include='',
                 preserve_hard_links='',
                 **kwargs):
        """
        :param collect_point: collect point where files are stored
        :param source_path: absolute path of a directory to backup
        :param destination_path: relative path of the backup destination (must be a directory name, e.g. "data")
        :param exclude: exclude files matching PATTERN. If PATTERN starts with '@', it must be the absolute path of
            a file (cf. the --exclude-from option from rsync)
        :param include: don't exclude files matching PATTERN. If PATTERN starts with '@', it must be the absolute path
            of a file (cf. the --include-from option from rsync)
        :param preserve_hard_links: preserve hard links
        """
        super(LocalFiles, self).__init__(name, collect_point, **kwargs)
        self.source_path = source_path
        self.destination_path = destination_path
        self.exclude = exclude
        self.include = include
        self.preserve_hard_links = preserve_hard_links.lower().strip() in (
            'yes', 'true', 'on', '1')

    def backup(self):
        cmd = [
            self.config.rsync_executable,
            '-a',
            '--delete',
            '-S',
        ]
        if self.preserve_hard_links:
            cmd.append('-H')
        # noinspection PyTypeChecker
        if self.exclude and self.exclude.startswith('@'):
            cmd += ['--exclude-from', self.exclude[1:]]
        elif self.exclude:
            cmd += ['--exclude', self.exclude]
        # noinspection PyTypeChecker
        if self.include and self.include.startswith('@'):
            cmd += ['--include-from', self.include[1:]]
        elif self.include:
            cmd += ['--include', self.include]
        dirname = os.path.join(self.collect_point.import_data_path,
                               self.destination_path)
        self.ensure_dir(dirname)
        source = self.source_path
        if not source.endswith(os.path.sep):
            source += os.path.sep
        if not dirname.endswith(os.path.sep):
            dirname += os.path.sep
        cmd += [source, dirname]
        self.execute_command(cmd)

    def restore(self):
        cmd = [
            self.config.rsync_executable,
            '-a',
            '--delete',
            '-S',
        ]
        if self.preserve_hard_links:
            cmd.append('-H')
        dirname = os.path.join(self.collect_point.import_data_path,
                               self.destination_path)
        source = self.source_path
        self.ensure_dir(dirname)
        self.ensure_dir(source)
        if not source.endswith(os.path.sep):
            source += os.path.sep
        if not dirname.endswith(os.path.sep):
            dirname += os.path.sep
        cmd += [dirname, source]
        self.execute_command(cmd)
Esempio n. 26
0
class CollectPoint(Point):
    """Collect point, made of one or more sources.
     Each source is run and contribute to new
    """
    parameters = Point.parameters + [
        Parameter(
            'collect_point_tags',
            converter=strip_split,
            help_str=
            'list of tags (comma-separated) associated to this collect point. Default: "collect"'
        ),
        Parameter(
            'included_backup_point_tags',
            converter=strip_split,
            help_str=
            'any backup point with one of these tags (comma-separated) will be associated '
            'to this local repo. You can use ? or * as jokers in these tags. Default: "*"'
        ),
        Parameter(
            'excluded_backup_point_tags',
            converter=strip_split,
            help_str=
            'any backup point with one of these tags (comma-separated) will not be associated'
            ' to this local repo. You can use ? or * as jokers in these tags. Have precedence over '
            'included_collect_point_tags and included_backup_point_tags.'),
    ]
    checks = []

    # list of callable(runner, collect_point, backup_points)

    def __init__(self,
                 name,
                 collect_point_tags=None,
                 included_backup_point_tags=None,
                 excluded_backup_point_tags=None,
                 **kwargs):
        super(CollectPoint, self).__init__(name=name, **kwargs)
        self.collect_point_tags = [
            'collect'
        ] if collect_point_tags is None else collect_point_tags
        self.included_backup_point_tags = [
            '*'
        ] if included_backup_point_tags is None else included_backup_point_tags
        self.excluded_backup_point_tags = excluded_backup_point_tags or []
        self.sources = []
        # self.last_backup_file = last_backup_file

    def backup(self, force=False):
        """ perform the backup and log all errors
        """
        self.print_info('backup of collect point %s' % self.name)
        info = self.get_info()
        assert isinstance(info, PointInfo)
        out_of_date = self.check_out_of_date_backup(
            current_time=datetime.datetime.now(),
            previous_time=info.last_success)
        if not (force or out_of_date):
            # the last previous backup is still valid
            # => nothing to do
            self.print_success(
                'last backup (%s) is still valid. No backup to do.' %
                info.last_success)
            return True
        elif info.last_success is None:
            self.print_info('no previous backup: a new backup is required.')
        elif out_of_date:
            self.print_info('last backup (%s) is out-of-date.' %
                            str(info.last_success))
        elif force:
            self.print_info(
                'last backup (%s) is still valid but a new backup is forced.' %
                str(info.last_success))
        lock_ = None
        cwd = os.getcwd()
        try:
            if self.can_execute_command(''):
                lock_ = self.get_lock()
            self.pre_source_backup()
            for source in self.sources:
                source.backup()
            self.post_source_backup()

            next_path = self.private_data_path
            for filter_ in self.filters:
                next_path = filter_.backup(next_path,
                                           self.filter_private_path(filter_),
                                           allow_in_place=True)

            info.total_size = self.get_repository_size()
            info.success_count += 1
            info.last_state_valid = True
            info.last_success = datetime.datetime.now()
            info.last_message = 'ok'
        except Exception as e:
            self.print_error('unable to perform backup: %s' % text_type(e))
            info.fail_count += 1
            info.last_fail = datetime.datetime.now()
            info.last_state_valid = False
            info.last_message = text_type(e)
        finally:
            os.chdir(cwd)

        if lock_ is not None:
            try:
                if self.can_execute_command(''):
                    self.release_lock(lock_)
            except Exception as e:
                self.print_error('unable to release lock. %s' % text_type(e))
        if self.can_execute_command('# register this backup state'):
            self.set_info(info)
        return info.last_state_valid

    def restore(self):
        next_path = self.private_data_path
        filter_data = []
        for filter_ in self.filters:
            filter_data.append((filter_, next_path))
            next_path = filter_.next_path(next_path,
                                          self.filter_private_path(filter_),
                                          allow_in_place=True)
        for filter_, next_path in reversed(filter_data):
            filter_.restore(next_path,
                            self.filter_private_path(filter_),
                            allow_in_place=True)

        self.pre_source_restore()
        for source in self.sources:
            source.restore()
        self.post_source_restore()

    def add_source(self, source):
        """
        :param source: source
        :type source: :class:`polyarchiv.sources.Source`
        """
        self.sources.append(source)

    @property
    def import_data_path(self):
        """Must return a valid directory where a source can write its files.
        If the collect point is not the filesystem, any file written in this directory by a source must be stored
        to the collect point's storage.
        """
        raise NotImplementedError

    @property
    def private_data_path(self):
        """where all exported data are actually stored and where the first filter are applied"""
        raise NotImplementedError

    @cached_property
    def export_data_path(self):
        """exported data by the last filter"""
        from polyarchiv.filters import FileFilter
        next_path = self.private_data_path
        for filter_ in self.filters:
            assert isinstance(filter_, FileFilter)
            next_path = filter_.next_path(next_path,
                                          self.filter_private_path(filter_),
                                          allow_in_place=True)
        return next_path

    @property
    def metadata_path(self):
        raise NotImplementedError

    def pre_source_backup(self):
        """called before the first source backup"""
        pass

    def post_source_backup(self):
        """called after the last source backup"""
        pass

    def pre_source_restore(self):
        """called before the first source restore"""
        pass

    def post_source_restore(self):
        """called after the last source restore"""
        pass

    def get_repository_size(self):
        """ return the size of the repository (in bytes)
        :return:
        :rtype:
        """
        raise NotImplementedError

    def get_info(self):
        raise NotImplementedError

    def set_info(self, info):
        raise NotImplementedError

    def get_lock(self):
        """Return a lock object, ensuring that only one instance of this repository is currently running"""
        raise NotImplementedError

    def release_lock(self, lock_):
        """Release the lock object provided by the above method"""
        raise NotImplementedError

    def backup_point_private_path(self, backup_point):
        from polyarchiv.backup_points import BackupPoint
        assert isinstance(backup_point, BackupPoint)
        raise NotImplementedError

    def filter_private_path(self, filter_):
        from polyarchiv.filters import FileFilter
        assert isinstance(filter_, FileFilter)
        raise NotImplementedError

    def execute_hook(self, when, cm, result=None):
        result_ = {self.name: result}
        for hook in self.hooks:
            assert isinstance(hook, Hook)
            if when in hook.hooked_events:
                hook.call(when, cm, result_, {})