Example #1
0
class RsyncJob(Job):
    """
    Class containing a rsync job

    :param log_dir: Log directory path
    :type log_dir: string
    :param destination: Destination path
    :type destination: string
    :param name: Job name
    :type name: string
    :param source: Source path
    :type source: string
    :param period: Min duration between two backups (in seconds)
    :type period: float
    :param snapshot: Activate (True) or desactivate (False) snapshots or simple (None) copy
    :type snapshot: bool or None
    :param duration: How many days snapshots are kept
    :type duration: int
    :param keep: How many snapshots are (at least) kept
    :type keep: int
    :param force: overide the timebase check, no min. duration.
    :type force: bool
    :param guid: (uid, gid) for destination
    :type guid: tuple
    :param filter: Rsync filters
    :type filter: list


    .. note::

        Source and destination path can be either real path
        or a ssh login joined to the path by a : character.

        if uid or gid are None, files owner are not changed
    """

    def __init__(self, log_dir, destination, name, source, period, snapshot, duration, keep, force, guid, filter):

        self.name = name
        self.source = Target(source)
        self.destination = Target(destination)
        self.period = period
        self.snapshot = snapshot
        self.duration = duration
        self.keep = keep
        self.filter = filter

        self.force = force
        self.now = datetime.datetime.now()
        self.current_date = self.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")

        self.dest_uid, self.dest_gid = guid

        self.backup_log_dir = log_dir

        self.logger = logging.getLogger('Vitalus.RsyncJob')

        # Logs specific to the rsync job
        job_log = os.path.join(self.backup_log_dir, self.name + '.log')
        self.job_logger = logging.getLogger(self.name)
        log_rotator = logging.handlers.TimedRotatingFileHandler(job_log,
                                                                when='midnight',
                                                                interval=1,
                                                                backupCount=30,
                                                                encoding=None,
                                                                delay=False,
                                                                utc=False)
        self.job_logger.addHandler(log_rotator)
        self.job_logger.setLevel(logging.INFO)

        # Set previous and current backup paths
        self.previous_backup_path = None  # will be detected later
        self.current_backup_path = None

#    def _check_disk_usage(self):
#        """
#        Check the disk usage
#        :raises TARGETError: if low disk space
#        """
#        if self.destination.is_local():
#            #TODO, change the criterion
#            pass
#            #if psutil.disk_usage(self.destination)[2] < utils.get_folder_size(self.source):
#            #    self.logger.critical("Low disk space: %s", self.destination)
#            #    raise TARGETError('Low disk space on %s' % self.destination)
#        elif self.destination.is_ssh():
#            #TODO
#            pass

    def _delete_old_files(self, days=10, keep=10):
        """
        Delete old archives in the destination

        :param days: delete files older than this value
        :type days: int
        :param keep: keep at least this amount of archives
        :type keep: int
        """
        #TODO : review logs

        path = os.path.join(self.destination.path, self.name)

        self.destination.check_availability()
        if self.destination.is_local():
            filenames = os.listdir(path)
        elif self.destination.is_ssh():
                command = ['ssh', '-t', self.destination.login, 'ls', '-1', path]
                self.logger.debug('SSH ls command: ' + str(command))
                process = subprocess.Popen(command, bufsize=4096, stdout=subprocess.PIPE)
                stdout, stderr = process.communicate()
                filenames = stdout.decode()
                filenames = filenames.split('\n')
                filenames = [x.strip('\r') for x in filenames if x != '']
        else:
            return

        to_delete = utils.get_older_files(filenames, days, keep)
        self.logger.debug("Backups available %s ", filenames)
        self.logger.debug("Backups to delete %s ", to_delete)

        self.destination.check_availability()
        if self.destination.is_local():
            for element in to_delete:
                self.logger.debug("Remove backup %s", element)
                try:
                    shutil.rmtree(os.path.join(path, element))
                except OSError:
                    self.logger.debug("Could not delete %s, try to chmod 644", os.path.join(path, element))
                    utils.r_chmod(os.path.join(path, element), 0o664)
                    try:
                        # try again
                        shutil.rmtree(os.path.join(path, element))
                    except OSError:
                        self.logger.error("Impossible to delete %s (symlink?)", os.path.join(path, element))
        elif self.destination.is_ssh():
            filepaths = [os.path.join(path, element) for element in to_delete]
            if filepaths != []:
                command = ['ssh', '-t', self.destination.login, 'rm', '-rf']
                command.extend(filepaths)
                self.logger.debug('SSH rm command: ' + str(command))
                process = subprocess.Popen(command, bufsize=4096, stdout=subprocess.PIPE)
                stdout, stderr = process.communicate()

    def _get_last_backup(self):
        """
        Get the last backup path
        Return None if not available

        :returns: string
        """
        path = os.path.join(self.destination.path, self.name)
        self.destination.check_availability()
        if self.destination.is_local():
            if not os.path.isdir(path):
                return None
            #filenames = [os.path.join(path, el) for el in os.listdir(path)]
            filenames = os.listdir(path)
        elif self.destination.is_ssh():
            #First, create at least the target if does not exists
            command = ['ssh', '-t', self.destination.login, 'mkdir', '-p', path]
            self.logger.debug('SSH mkdir command: ' + str(command))
            process = subprocess.Popen(command, bufsize=4096, stdout=subprocess.PIPE)
            stdout, stderr = process.communicate()
            self.logger.debug('SSH mkdir result: ' + stdout.decode())

            command = ['ssh', '-t', self.destination.login, 'ls', '-1', path]
            self.logger.debug('SSH ls command: ' + str(command))
            process = subprocess.Popen(command, bufsize=4096, stdout=subprocess.PIPE)
            stdout, stderr = process.communicate()
            filenames = stdout.decode()
            filenames = filenames.split('\n')
            filenames = [x.strip('\r') for x in filenames if x != '']

        last = utils.get_last_file(filenames)
        if last is not None:
            last = os.path.join(path, last)
        self.logger.debug('_get_last_backup returns: %s', last)
        return last

    def _prepare_destination(self):
        """
        Prepare the destination to receive a backup:
        Create dirs
        """
        self.destination.check_availability()

        # Define current backup path
        if self.snapshot is True:
            self.current_backup_path = os.path.join(self.destination.path, self.name, str(self.current_date))
        elif self.snapshot is False:
            self.current_backup_path = os.path.join(self.destination.path, self.name, str(self.current_date))
        elif self.snapshot is None:
            self.current_backup_path = os.path.join(self.destination.path, self.name)
        else:
            raise ValueError('Wrong snapshot value (True, False or None)')

        # Make dirs
        if self.destination.is_local():
            if self.snapshot is True:
                os.makedirs(self.current_backup_path)  # This one does not exist!
            elif self.snapshot is False:
                if self.previous_backup_path is None:
                    os.makedirs(self.current_backup_path, exist_ok=True)
                else:
                    #Move dir to set the new date in the path
                    os.rename(self.previous_backup_path, self.current_backup_path)
            elif self.snapshot is None:
                os.makedirs(self.current_backup_path, exist_ok=True)
        elif self.destination.is_ssh():
            #Create dirs
            command = ['ssh', '-t', self.destination.login, 'mkdir', '-p', self.current_backup_path]
            self.logger.debug('SSH mkdir command: ' + str(command))
            process = subprocess.Popen(command, bufsize=4096, stdout=subprocess.PIPE)
            stdout, stderr = process.communicate()
            self.logger.debug('SSH mkdir result: ' + stdout.decode())

    def _prepare_rsync_command(self):
        """
        Compose the rsync command
        """
        command = list()
        command.append('/usr/bin/rsync')

        # a: archive (recursivity, preserve rights and times...)
        # v: verbose
        # h: human readable
        # stat: file rate stats
        # delete: delete extraneous files from dest dirs
        # delete-excluded: also delete excluded files from dest dirs
        # L: turn symlinks to dir/file
        command.append('-avh')
        command.append('--stats')
        command.append('--delete')
        command.append('--delete-excluded')
        command.append('-L')

        # z: compress the flux if transfert thought a network
        if (self.source.is_ssh() or self.destination.is_ssh()):
            command.append('-z')
        if self.snapshot and self.previous_backup_path is not None:
            #Even if it works for ttype==Dir
            #It fails for ttype=SSH
            #If link-dest is not a relative path
            path = os.path.basename(self.previous_backup_path)
            command.append('--link-dest=../' + path)

        # Add source and destination
        command.append(self.source.target)
        if self.destination.is_ssh():
            full_dest = str(self.destination.login) + ':' + str(self.current_backup_path)
            command.append(full_dest)
        else:
            command.append(self.current_backup_path)

        if self.filter:
            # Add filters, the resulting command must look like
            # rsync -av a b --filter='- *.txt' --filter='- *dir'
            for element in self.filter:
                command.append('--filter=' + element)
                self.logger.debug("add filter: %s", element)

        self.logger.debug("rsync command: %s", command)
        return command

    def _run_command(self, command):
        """
        Run a command and log stderr+stdout in a dedicated log file.

        :param command: Command: each element is a part of the command line
        :type command: list

        .. note::

            Example of the command format
            command = ['/usr/bin/cp', '-r', '/home', '/tmp']
        """
        #Run the command
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()

        #Dump outputs in log files
        log = stdout.decode()
        self.job_logger.info(log)

        if stderr != b'':
            self.job_logger.info('Errors:')
            self.job_logger.info(stderr.decode())

    def run(self, uid=None, gid=None):
        """
        Run the job.
        """
        self.logger.debug('Start rsync job: %s', self.name)
        #TODO rewriting and integration:
        #self._check_disk_usage()

        try:
            last_date = self._get_last_backup()
            if last_date is None:
                #It means that this is the first backup.
                self.previous_backup_path = None
            else:
                #self.previous_backup_path = os.path.join(self.destination.path, self.name, str(last_date))
                self.previous_backup_path = last_date

            self.logger.debug("Previous backup path: %s", self.previous_backup_path)
            self.logger.debug("Current backup path: %s", self.current_backup_path)

            if self._check_need_backup() or self.force:
                self.job_logger.info('='*20 + str(self.now) + '='*20)
                self.logger.debug('Start Backup: %s', self.name)
                print(self.name)

                # Prepare the destination
                self._prepare_destination()
                self.logger.debug("source path %s", self.source.target)
                self.logger.debug("destination path %s", self.destination.target)
                self.logger.debug("filter path %s", self.filter)

                # Run rsync
                command = self._prepare_rsync_command()
                self._run_command(command)

                # Job done, update the time in the database
                self._set_lastbackup_time()

                # Remove old snapshots
                self._delete_old_files(days=self.duration, keep=self.keep)

                # Create symlink
                if self.snapshot is True or self.snapshot is False:
                    last = os.path.join(self.destination.path, self.name, 'last')
                    if self.destination.is_local():
                        if os.path.islink(last):
                            os.remove(last)
                        os.chdir(os.path.dirname(self.current_backup_path))
                        try:
                            os.symlink(os.path.basename(self.current_backup_path), last)
                        except FileExistsError:
                            self.logger.warning('The symlink %s could not be created because a file exists', last)
                        except AttributeError:
                            self.logger.warning('Attribute error for symlink. Job: %s', self.name)
                    elif self.destination.is_ssh():
                        self.logger.warning('symlink for SSH not yet implemented')
                        #TODO Create symlink

                # UID/GID
                if self.dest_uid and self.dest_gid:
                    self._chown_destination(self.dest_uid, self.dest_gid)
                elif (self.dest_uid and not self.dest_gid) or (not self.dest_uid and self.dest_gid):
                    self.logger.error('uid or gid missing')

                self.logger.info("Backup %s done", self.name)
        except TARGETError as e:
            self.logger.warning(e)

    def _chown_destination(self, uid, gid):
        """
        Change owner of files in destination

        :param uid: user ID
        :param gid: group ID
        """
        if self.destination.is_local():
            self.logger.debug('chown %s %s for %s' % (uid, gid, self.current_backup_path))
            utils.r_chown(self.current_backup_path, uid, gid)
        elif self.destination.is_ssh():
            self.logger.warning('chown for SSH not yet implemented')
Example #2
0
 def test_is_ssh_dir_abs(self):
     tmp = tempfile.TemporaryDirectory(suffix='', prefix='tmp', dir=None)
     target = Target(tmp.name)
     self.assertFalse(target.is_ssh())
Example #3
0
 def test_is_ssh_ssh_ipv4(self):
     target = Target('[email protected]:.')
     self.assertTrue(target.is_ssh())
Example #4
0
 def test_is_ssh_ssh_cdomain(self):
     target = Target('[email protected]:.')
     self.assertTrue(target.is_ssh())
Example #5
0
 def test_is_ssh_ssh_domain(self):
     target = Target('[email protected]:.')
     self.assertTrue(target.is_ssh())
Example #6
0
 def test_is_ssh_dir_abs(self):
     tmp_dir_name = tempfile.gettempdir()
     target = Target(tmp_dir_name)
     self.assertFalse(target.is_ssh())