def list_all(self, curr_path, remote_objs): try: dirlist = self.sftp.listdir_attr(curr_path) for curr_file in dirlist: rempath = self.sanitize_remote_path(os.path.join(curr_path, curr_file.filename)) relpath = self.sanitize_remote_path(relpth.relpath(self.remote_dir,rempath)) if self.isdir(rempath): remote_objs[relpath] = curr_file.st_mtime self.list_all(rempath, remote_objs) else: remote_objs[relpath] = curr_file.st_mtime except IOError, err: log.info('Cannot read %s ' % curr_path) raise err
def build_update_selected(self, direction=UP, selected_files=[]): ''' Build the updates for syncing, looking at only the state of selected files syncing in the selected direction. File path should be of a unix style path, from the base (self.local_dir or self.remote_dir) to the filename. this can be appended to the self.local_dir or self.remote_dir to get the proper full path. e.g if self.local_dir is /var/scm/git/repo and the full path to the selected file is /var/scm/git/repo/path/to/file, the file path passed in should be path/to/file. If a folder is being deleted, all sub files and folders must also be passed in selected_files as well. direction may only be UP or DOWN. Syncing in both directions is not supported by this method. ''' update = {} update['delete_local'] = [] update['delete_remote'] = [] update['update_local'] = [] update['update_remote'] = [] #List remote files and dirs. we care about this to find if the # selected files exist remotely or not # PERFORMANCE NOTE - we're loading all remote files to check against # potentially a single selected file. this could be smarter log.debug('*** Listing remote dirs and files...') remote_objs = {} self.list_all(self.remote_dir, remote_objs) # remove ignored files. for f in self.ignored_files: if f in remote_objs: del remote_objs[f] # this is to remove items that may be subdirectories of an ignored # file. for instance you want to ignore .git, but don't want to have # to specify all subdirs to ignore (such as .git/objects). # # TODO # CURRENTLY this will not allow syncing of .gitignore and similar # things as they start with .git. figure out if this things should # be rsyncable for k in remote_objs.keys(): if k.startswith(f): del remote_objs[k] #List local files ands dirs. we care about this to find if the # selected files exist locally or not # PERFORMANCE NOTE - we're loading all remote files to check against # potentially a single selected file. this could be smarter local_objs = {} for root, dirs, files in os.walk(self.local_dir): for afile in files: path = os.path.join(root, afile) local_objs[relpth.relpath(self.local_dir,path)] = os.path.getmtime(path) for dir in dirs: path = os.path.join(root, dir) local_objs[relpth.relpath(self.local_dir,path)] = os.path.getmtime(path) # remove ignored files. for f in self.ignored_files: if f in local_objs: del local_objs[f] # this is to remove items that may be subdirectories of an ignored # file. for instance you want to ignore .git, but don't want to have # to specify all subdirs to ignore (such as .git/objects). # # TODO # CURRENTLY this will not allow syncing of .gitignore and similar # things as they start with .git. figure out if this things should # be rsyncable for k in local_objs.keys(): if k.startswith(f): del local_objs[k] if direction == UP: # we don't need to futz with the old remote files. this operation will # force the state of the selected local files onto the server # regardless of which is newer for file in selected_files: if file in remote_objs: if file not in local_objs: # file exists on the server but not locally. file should be # removed on the server update['delete_remote'].append(file) else: # file exists on both server and local, the state of the # local file should be pushed to the server (regardless of # which is newer) update['update_remote'].append(file) else: # file exists locally but not remotely. local state should be # pushed to the server update['update_remote'].append(file) elif direction == DOWN: # we don't need to futz with the old local files. this operation will # force the state of the selected remote files locally regardless of # which is newer for file in selected_files: if file in local_objs: if file not in remote_objs: # file exists locally but not on the server. file should be # removed locally update['delete_local'].append(file) else: # file exists on both server and local, the state of the # server file should be pulled locally (regardless of # which is newer) update['update_local'].append(file) else: # file exists on the server but not locally. server state # should be pulled from the server update['update_local'].append(file) else: raise ValueError( 'Invalid sync direction [%s] for syncing selected files' % direction) # Sorting update. reverse deleted so that the files get deleted before # directories if necessary. update['delete_local'].sort() update['delete_remote'].sort() update['delete_local'].reverse() update['delete_remote'].reverse() update['update_local'].sort() update['update_remote'].sort() return (update,local_objs,remote_objs)
def build_update_all(self, direction=BOTH): ''' TODO: Change to use the direction constants UP, DOWN, or BOTH Build the updates for syncing, looking at all files for changes in the desired directions. If direction is DOWN or BOTH, changes on the remote server will be synced locally. If direction is UP or BOTH, local changes will be synced to the remote server. currently the changes with the latest mtime will be the sync winner in case of conflict. ''' update = {} update['delete_local'] = [] update['delete_remote'] = [] update['update_local'] = [] update['update_remote'] = [] #Load remote last synced file ## If syncing remote to local (or both ways), you want old_remote_objs ## so you can determine what has been deleted remotely that needs to be ## deleted locally log.debug('*** Loading remote last synced dirs and files...') old_remote_objs = {} rempath = self.sanitize_remote_path(os.path.join(self.remote_dir,'.khtsync')) if self.exists(rempath): try: with self.sftp.file(rempath ,'rb') as fh: old_remote_objs = pickle.load(fh) if type(old_remote_objs) != dict: raise except: old_remote_objs = {} #Load local last synced file ## If syncing local to remote (or both ways), you want old_local_objs ## so you can determine what has been deleted locally that needs to be ## deleted remotely log.debug('*** Loading local last synced dirs and files....') old_local_objs = {} locpath = os.path.join(self.local_dir,'.khtsync') if os.path.exists(locpath): try: with open(locpath ,'rb') as fh: old_local_objs = pickle.load(fh) if type(old_local_objs) != dict: raise except: old_local_objs = {} #List remote files and dirs log.debug('*** Listing remote dirs and files...') remote_objs = {} self.list_all(self.remote_dir, remote_objs) # remove ignored files. for f in self.ignored_files: if f in remote_objs: del remote_objs[f] # this is to remove items that may be subdirectories of an ignored # file. for instance you want to ignore .git, but don't want to have # to specify all subdirs to ignore (such as .git/objects). # # TODO # CURRENTLY this will not allow syncing of .gitignore and similar # things as they start with .git. figure out how to use blobs to # remedy this. possibly jusy specify '.git/' as the ignored file? # that might not get .git though for k in remote_objs.keys(): if k.startswith(f): del remote_objs[k] #List local files ands dirs local_objs = {} for root, dirs, files in os.walk(self.local_dir): for afile in files: path = os.path.join(root, afile) local_objs[relpth.relpath(self.local_dir,path)] = os.path.getmtime(path) for dir in dirs: path = os.path.join(root, dir) print "path: %s" % path local_objs[relpth.relpath(self.local_dir,path)] = os.path.getmtime(path) # remove ignored files. for f in self.ignored_files: if f in local_objs: del local_objs[f] # this is to remove items that may be subdirectories of an ignored # file. for instance you want to ignore .git, but don't want to have # to specify all subdirs to ignore (such as .git/objects). # # TODO # CURRENTLY this will not allow syncing of .gitignore and similar # things as they start with .git. figure out if this things should # be rsyncable for k in local_objs.keys(): if k.startswith(f): del local_objs[k] log.debug('*** listing deleted files and dirs...') #Deleted local objs ## Only do this if syncing local to remote if direction == UP or direction == BOTH: alist = list(set(old_local_objs) - set(local_objs)) for relpath in alist: if relpath in remote_objs: if old_local_objs[relpath]>=remote_objs[relpath]: update['delete_remote'].append(relpath) #Deleted remote objs ## Only do this if syncing remote to local if direction == DOWN or direction == BOTH: alist = list(set(old_remote_objs) - set(remote_objs)) for relpath in alist: if relpath in local_objs: if old_remote_objs[relpath]>=local_objs[relpath]: update['delete_local'].append(relpath) #New Local files ## only do this if syncing local to remote if direction == UP or direction == BOTH: log.debug('*** listing new local files...') update['update_remote'].extend(list((set(local_objs) - set(remote_objs)))) #New Remote files ## only do this if syncing remote to local if direction == DOWN or direction == BOTH: log.debug('*** listing new remote files...') update['update_local'].extend(list((set(remote_objs) - set(local_objs)))) #Check modified files log.debug('*** listing modified files...') for relpath in set(remote_objs).intersection(local_objs): if (local_objs[relpath] - remote_objs[relpath]) > 1: ## only do this if syncing local to remote if direction == UP or direction == BOTH: log.debug('*** Modified local file : %s : %s < %s' % (relpath,unicode(local_objs[relpath]), unicode(remote_objs[relpath]))) update['update_remote'].append(relpath) elif (remote_objs[relpath] - local_objs[relpath]) > 1: ## only do this if syncing remote to local if direction == DOWN or direction == BOTH: log.debug('*** Modified remote file : %s : %s < %s' % (relpath,unicode(local_objs[relpath]), unicode(remote_objs[relpath]))) update['update_local'].append(relpath) # Sorting update. reverse deleted so that the files get deleted before # directories if necessary. update['delete_local'].sort() update['delete_remote'].sort() update['delete_local'].reverse() update['delete_remote'].reverse() update['update_local'].sort() update['update_remote'].sort() return (update,local_objs,remote_objs)