def index_local_dir(self, local_dir, exclusion_list=None): common.print_line('indexing local directory: ' + local_dir + '...') if self.__exclude_regex is not None: regexp = re.compile(self.__exclude_regex) clfiles = {} for root, dirs, files in os.walk(local_dir): for name in dirs: full_path = os.path.join(root, name).replace('\\', '/') logging.debug('adding ' + full_path + ' to list') if exclusion_list is not None: exclusion_found = False for exclusion in exclusion_list: if exclusion in full_path: exclusion_found = True if exclusion_found is True: logging.debug('exclusion ' + exclusion + ' applied for path ' + full_path) continue if self.__exclude_regex is not None and regexp.search( full_path) is not None: logging.debug('regexp match for path: ' + full_path) continue tmp_clfile = clfile.ClFile() tmp_clfile.is_dir = True tmp_clfile.path = os.path.dirname(full_path) tmp_clfile.name = name tmp_clfile.size = "-1" tmp_clfile.mod_time = os.stat(full_path).st_mtime clfiles[common.normalize_path(tmp_clfile.path + '/' + tmp_clfile.name)] = tmp_clfile for name in files: full_path = os.path.join(root, name) logging.debug('adding ' + full_path + ' to list') if exclusion_list is not None: exclusion_found = False for exclusion in exclusion_list: if exclusion in full_path: exclusion_found = True if exclusion_found is True: logging.debug('exclusion ' + exclusion + ' applies for ' + full_path) continue if self.__exclude_regex is not None and regexp.search( full_path) is not None: logging.debug('regexp match for path: ' + full_path) continue tmp_clfile = clfile.ClFile() tmp_clfile.is_dir = False tmp_clfile.path = os.path.dirname(full_path) tmp_clfile.name = name tmp_clfile.size = os.stat(full_path).st_size tmp_clfile.mod_time = os.stat(full_path).st_mtime if self._compare_method == 'md5': tmp_clfile.md5 = common.get_md5(full_path) clfiles[common.normalize_path(tmp_clfile.path + '/' + tmp_clfile.name)] = tmp_clfile logging.debug('retrieved ' + str(len(clfiles)) + ' files') return clfiles
def ls(self, file, with_dups=False, regex=None): logging.debug('lsjson of file: ' + file) if self._config['no_cache'] is False and self._cache is not None: logging.debug('serving cached version of file list...') self._cache_counter += 1 if self._cache_counter <= self._cache_invalidation_max: return self._cache else: self._cache_counter = 0 if not file.startswith('/'): logging.debug('adding / ' + file) file = '/' + file if regex is not None: regexp = re.compile(regex) else: regexp = None files = {} md5s = None if self._compare_method == 'md5': md5s = self.lsmd5(file) for remote in self.get_remotes(): common.print_line('retrieving file list from: ' + remote + file + '...') logging.debug('getting lsjson from ' + remote + file) try: json_out = self._rclone.lsjson(remote, file, ['--recursive', '--fast-list'], True) except exceptions.FileNotFoundException as e: json_out = '[]' logging.debug('loading json') tmp_json = json.loads(json_out) logging.debug('json size: ' + str(len(tmp_json))) logging.debug('json loaded') for tmp_json_file in tmp_json: tmp_file = clfile.ClFile() tmp_file.remote = remote tmp_file.path = file + '/' + tmp_json_file['Path'] tmp_file.name = tmp_json_file['Name'] tmp_file.size = tmp_json_file['Size'] tmp_file.mime_type = tmp_json_file['MimeType'] tmp_file.mod_time = tmp_json_file['ModTime'] tmp_file.is_dir = tmp_json_file['IsDir'] tmp_file.id = tmp_json_file['ID'] key = file + '/' + tmp_json_file['Path'] if regexp is not None and regexp.search(key) is None: logging.debug('skipping ' + key + '...') continue if self._compare_method == 'md5' and not tmp_file.is_dir: tmp_file.md5 = md5s[key] if with_dups and tmp_file.is_dir is False and key in files: key = key + ClSync.duplicate_suffix files[key] = tmp_file logging.debug('end of clsync.ls()') if self._config['no_cache'] is False and self._cache is None: self._cache = files return files
def remove_duplicates(): global __cl_sync common.print_line('removing duplicates') if __cl_sync is None: __cl_sync = clsync.ClSync(__config) if len(__args) == 1: logging.error('invalid removedups command') usage_removedups() sys.exit(-1) __cl_sync.remove_duplicates(common.remove_ending_slash(__args[1]))
def run(self): logging.info('starting daemom to backup path: ' + self.__local_dir) sleep_interval = self.__config['daemon_interval'] * 60 while True: __cl_sync = clsync.ClSync(self.__config) local_dir = common.remove_ending_slash(self.__local_dir) common.print_line('backing up ' + local_dir + '...') __cl_sync.backup(local_dir, self.__config['delete_files'], self.__config['dry_run']) logging.info('sleeping for ' + str(sleep_interval) + ' seconds...') time.sleep(sleep_interval)
def backup(): global __cl_sync if __cl_sync is None: __cl_sync = clsync.ClSync(__config) if len(__args) == 1: logging.error('invalid backup command') usage_backup() sys.exit(-1) local_dir = common.remove_ending_slash(__args[1]) common.print_line('backing up ' + local_dir + '...') __cl_sync.backup(local_dir, __config['delete_files'], __config['dry_run'])
def restore(self, remote_path, local_dir, dry_run=False): logging.debug('restoring directory ' + local_dir + ' from ' + remote_path) if not common.is_dir(local_dir): #logging.error('directory ' + local_dir + ' not found') common.print_line('destination directory ' + local_dir + ' not found!') return #raise Exception('directory ' + local_dir + ' not found') for remote in self.get_remotes(): common.print_line('restoring file ' + remote + remote_path + ' -> ' + local_dir) logging.debug('restoring file ' + remote + remote_path + ' -> ' + local_dir) if dry_run is False: self.copy_new(remote + remote_path, local_dir, True)
def restore_old(self, remote_path, local_dir): logging.debug('restoring directory ' + local_dir + ' from ' + remote_path) if not common.is_dir(local_dir): #logging.error('directory ' + local_dir + ' not found') common.print_line('destination directory ' + local_dir + ' not found!') return #raise Exception('directory ' + local_dir + ' not found') remote_clfiles = self.ls(remote_path) for remote_clfile in remote_clfiles: remote = remote_clfiles[remote_clfile].remote path = remote_clfiles[remote_clfile].path common.print_line('restoring file ' + remote + os.path.dirname(path) + ' -> ' + local_dir) logging.debug('restoring file ' + os.path.dirname(path) + ' from remote ' + remote) self.copy_new(remote + os.path.dirname(path), local_dir)
def __init__(self, config_file=None, rclone_exe="rclone", rclone_retries="1"): logging.debug('constructing RClone') if config_file is not None and not common.is_file(config_file): logging.error("configuration file " + str(config_file) + " not found. Cannot continue!") raise Exception("Configuration file " + str(config_file) + " not found") if rclone_exe is not "rclone" and not common.is_file(rclone_exe): #logging.error("rclone executable " + str(rclone_exe) + " not found. Cannot continue!") common.print_line( 'RCLONE.EXE not in PATH. Put it in PATH or modify libsprinkle.conf to point to it.' ) raise Exception("rclone executable " + str(rclone_exe) + " not found") self._config_file = config_file self._rclone_exe = rclone_exe self._rclone_retries = rclone_retries
def lsmd5(self, file): logging.debug('lsjson of file: ' + file) if not file.startswith('/'): file = '/' + file files = {} for remote in self.get_remotes(): common.print_line('retrieving file list from: ' + remote + file + '...') logging.debug('getting lsjson from ' + remote + file) try: out = self._rclone.md5sum(remote, file, ['--fast-list'], True) except exceptions.FileNotFoundException as e: out = '' #logging.debug('out: ' + str(out.split('\n'))) md5s = out.split('\n') for line in md5s: if line == '': continue md5 = line.split(' ')[0] filename = line.split(' ')[1] files[file + '/' + filename] = md5 return files
def restore(): global __cl_sync if __cl_sync is None: __cl_sync = clsync.ClSync(__config) if len(__args) < 3: logging.error('invalid remote command') usage_restore() sys.exit(-1) remote_path = __args[2] local_dir = common.remove_ending_slash(__args[1]) if __config['restore_duplicates'] is False: common.print_line( 'checking if duplicates are present before restoring...') duplicates = __cl_sync.remove_duplicates(local_dir, True) if len(duplicates) > 0: common.print_line('DUPLICATE FILES FOUND:') for duplicate in duplicates: common.print_line("\t" + duplicate) common.print_line( 'restore cannot proceed! Use remove duplicates function before continuing' ) return common.print_line('restoring ' + remote_path + ' from ' + local_dir) __cl_sync.restore(local_dir, remote_path, __config['dry_run'])
def remove_duplicates(self, path, report_only=False): files = self.ls(path, True) common.print_line('analyzing for duplications...') keys = common.sort_dict_keys(files) duplicates = [] for key in keys: if key.endswith(ClSync.duplicate_suffix): logging.debug('found duplicate file: ' + key) date1 = common.get_datetime_from_iso8601(files[key].mod_time) logging.debug(key + ' timestamp: ' + str(date1.timestamp())) key2 = key.replace(ClSync.duplicate_suffix, '') date2 = common.get_datetime_from_iso8601(files[key2].mod_time) logging.debug(key2 + ' timestamp: ' + str(date2.timestamp())) if date1.timestamp() > date2.timestamp(): logging.debug(key + ' is newer than ' + key2) file_to_remove = files[key2].remote + key2 common.print_line('found duplicate file. Removing: ' + file_to_remove + '...') duplicates.append(key2) if report_only is False: self.delete_file(key2, files[key2].remote) elif date1.timestamp() == date1.timestamp(): logging.debug(key + ' is equal to ' + key2) file_to_remove = files[key2].remote + key2 common.print_line('found duplicate file. Removing: ' + file_to_remove + '...') duplicates.append(key2) if report_only is False: self.delete_file(key2, files[key2].remote) else: logging.debug(key + ' is older than ' + key2) file_to_remove = files[key].remote + key common.print_line('found duplicate file. Removing: ' + file_to_remove + '...') duplicates.append(key) if report_only is False: self.delete_file(key, files[key].remote) logging.debug('file to remove: ' + file_to_remove) return duplicates
def lsmd5(): global __cl_sync if __cl_sync is None: __cl_sync = clsync.ClSync(__config) if len(__args) == 1: logging.error('invalid lsmd5 command') usage_lsmd5() sys.exit(-1) files = __cl_sync.lsmd5(common.remove_ending_slash(__args[1])) largest_length = 25 keys = common.sort_dict_keys(files) for tmp_file in keys: filename_length = len(tmp_file) if filename_length > largest_length: largest_length = filename_length common.print_line('NAME'.ljust(largest_length) + " " + 'MD5'.ljust(32)) common.print_line(''.join('-' for i in range(largest_length)) + " " + ''.join('-' for i in range(32))) for tmp_file in keys: file_name = tmp_file common.print_line( file_name.ljust(largest_length) + " " + files[tmp_file])
def find(): global __cl_sync if __cl_sync is None: __cl_sync = clsync.ClSync(__config) if len(__args) == 1: logging.error('invalid find command') usage_find() sys.exit(-1) files = __cl_sync.find(common.remove_ending_slash(__args[1])) largest_length = 25 keys = common.sort_dict_keys(files) for tmp_file in keys: filename_length = len(files[tmp_file].path) if not files[tmp_file].is_dir and filename_length > largest_length: largest_length = filename_length common.print_line('---' + " " + 'NAME'.ljust(largest_length) + " " + 'SIZE'.rjust(9) + " " + 'MOD TIME'.ljust(19) + " " + 'REMOTE') common.print_line('---' + " " + ''.join('-' for i in range(largest_length)) + " " + ''.join('-' for i in range(9)) + " " + ''.join('-' for i in range(19)) + " " + ''.join('-' for i in range(15))) for tmp_file in keys: if files[tmp_file].is_dir is True: first_chars = '-d-' else: first_chars = '---' file_name = files[tmp_file].path if file_name.startswith('//'): file_name = file_name[1:len(file_name)] common.print_line( first_chars + " " + file_name.ljust(largest_length) + " " + str(files[tmp_file].size).rjust(9) + " " + common.get_printable_datetime(files[tmp_file].mod_time).ljust(19) + " " + files[tmp_file].remote)
def stats(): global __cl_sync logging.debug('display stats about the volumes') common.print_line('calculating total and free space...') if __cl_sync is None: __cl_sync = clsync.ClSync(__config) common.print_line('REMOTE'.ljust(15) + " " + 'SIZE'.rjust(20) + " " + 'FREE'.rjust(20) + " " + '%FREE'.rjust(10)) common.print_line(''.join('=' for i in range(15)) + " " + ''.join('=' for i in range(20)) + " " + ''.join('=' for i in range(20)) + " " + ''.join('=' for i in range(10))) sizes = __cl_sync.get_sizes() frees = __cl_sync.get_frees() display_unit = __config['display_unit'] for remote in sizes: percent_use = frees[remote] * 100 / sizes[remote] size_d = common.convert_unit(sizes[remote], display_unit) free_d = common.convert_unit(frees[remote], display_unit) common.print_line( remote.ljust(15) + " " + "{:,}".format(size_d).rjust(19) + display_unit + " " + "{:,}".format(free_d).rjust(19) + display_unit + " " + "{:,}".format(int(percent_use)).rjust(10)) size = __cl_sync.get_size() free = __cl_sync.get_free() logging.debug('size: ' + "{:,}".format(size)) logging.debug('free: ' + "{:,}".format(free)) percent_use = free * 100 / size common.print_line(''.join('-' for i in range(15)) + " " + ''.join('-' for i in range(20)) + " " + ''.join('-' for i in range(20)) + " " + ''.join('-' for i in range(10))) size_d = common.convert_unit(size, display_unit) free_d = common.convert_unit(free, display_unit) common.print_line("total:".ljust(15) + " " + "{:,}".format(size_d).rjust(19) + display_unit + " " + "{:,}".format(free_d).rjust(19) + display_unit + " " + "{:,}".format(int(percent_use)).rjust(10))
def backup(self, local_dir, delete_files=True, dry_run=False): logging.debug('backing up directory ' + local_dir) if not common.is_dir(local_dir): logging.error("local directory " + local_dir + " not found. Cannot continue!") raise Exception("Local directory " + local_dir + " not found") local_clfiles = self.index_local_dir(local_dir, self.__exclusion_list) remote_clfiles = self.ls(os.path.basename(local_dir)) ops = self.compare_clfiles(local_dir, local_clfiles, remote_clfiles, delete_files) if self._show_progress: bar = Bar( 'Progress', max=len(ops), suffix= '%(index)d/%(max)d %(percent)d%% [%(elapsed_td)s/%(eta_td)s]') if dry_run is True: common.print_line('performing a dry run. no changes are committed') for op in ops: logging.debug('operation: ' + op.operation + ", path: " + op.src.path) if self._show_progress: bar_title = op.src.name.ljust(25, '.') if len(bar_title) > 25: bar_title = bar_title[0:25] bar.message = 'file:' + bar_title if op.src.is_dir and op.operation != operation.Operation.REMOVE: logging.debug('skipping directory ' + op.src.path) continue if op.operation == operation.Operation.ADD: best_remote = self.get_best_remote(int(op.src.size)) logging.debug('best remote: ' + best_remote) if not self._show_progress: common.print_line('backing up file ' + op.src.path + '/' + op.src.name + ' -> ' + best_remote + ':' + op.src.remote_path) if dry_run is False: self.copy(op.src.path + '/' + op.src.name, op.src.remote_path, best_remote) if op.operation == operation.Operation.UPDATE: best_remote = self.get_best_remote(int(op.src.size)) logging.debug('best remote: ' + best_remote) if not self._show_progress: common.print_line('backing up file ' + op.src.path + '/' + op.src.name + ' -> ' + op.src.remote + ':' + op.src.remote_path) if dry_run is False: self.copy(op.src.path + '/' + op.src.name, op.src.remote_path, op.src.remote) if op.operation == operation.Operation.REMOVE and delete_files is True: if not self._show_progress: common.print_line('removing ' + op.src.remote + op.src.path) if op.src.is_dir: if dry_run is False: try: self.rmdir(op.src.path, op.src.remote) except Exception as e: logging.debug(str(e)) else: if dry_run is False: self.delete_file(op.src.path, op.src.remote) if self._show_progress: bar.next() if self._show_progress: bar.finish()
def compare_clfiles(self, local_dir, local_clfiles, remote_clfiles, delete_file=True): common.print_line('calculating differences...') logging.debug('comparing clfiles') logging.debug('local directory: ' + local_dir) logging.debug('local clfiles size: ' + str(len(local_clfiles))) logging.debug('remote clfiles size: ' + str(len(remote_clfiles))) remote_dir = os.path.dirname(local_dir) operations = [] for local_path in local_clfiles: local_clfile = local_clfiles[local_path] if local_clfile.is_dir: continue logging.debug('checking local clfile: ' + local_path + " name: " + local_clfile.name) rel_name = common.remove_localdir( local_dir, local_clfile.path + '/' + local_clfile.name) rel_path = common.remove_localdir(local_dir, local_clfile.path) logging.debug('relative name: ' + rel_name) if rel_name not in remote_clfiles: logging.debug('not found in remote_clfiles') local_clfile.remote_path = rel_path op = operation.Operation(operation.Operation.ADD, local_clfile, None) operations.append(op) else: logging.debug('file found in remote_clfiles') remote_clfile = remote_clfiles[rel_name] if self._compare_method == 'size': size_local = local_clfile.size size_remote = remote_clfile.size current_remote = remote_clfiles[rel_name].remote logging.debug('local_file.size:' + str(local_clfile.size) + ', remote_clfile.size:' + str(remote_clfile.size)) if size_local != size_remote: logging.debug('file has changed') local_clfile.remote_path = rel_path local_clfile.remote = current_remote op = operation.Operation(operation.Operation.UPDATE, local_clfile, None) operations.append(op) elif self._compare_method == 'md5': local_md5 = local_clfile.md5 remote_md5 = remote_clfile.md5 current_remote = remote_clfiles[rel_name].remote logging.debug('local_file.md5:' + str(local_md5) + ', remote_clfile.md5:' + str(remote_md5)) if local_md5 != remote_md5: logging.debug('file has changed') local_clfile.remote_path = rel_path local_clfile.remote = current_remote op = operation.Operation(operation.Operation.UPDATE, local_clfile, None) operations.append(op) else: logging.error('compare_method: ' + self._compare_method + ' not valid!') raise Exception('compare_method: ' + self._compare_method + ' not valid!') if delete_file is True: reverse_keys = common.sort_dict_keys(remote_clfiles, True) for remote_path in reverse_keys: remote_clfile = remote_clfiles[remote_path] logging.debug('checking file ' + remote_dir + remote_path + ' for deletion') rel_name = common.remove_localdir( local_dir, remote_clfile.path + '/' + remote_clfile.name) rel_path = common.remove_localdir(local_dir, remote_clfile.path) logging.debug('relative name: ' + rel_name) if remote_dir + remote_path not in local_clfiles: logging.debug('file ' + remote_path + ' has been deleted') remote_clfile.remote_path = rel_path op = operation.Operation(operation.Operation.REMOVE, remote_clfile, None) operations.append(op) common.print_line('found ' + str(len(operations)) + ' differences') return operations