def put_r(self, localpath, remote_path, confirm=True, preserve_mtime=False): """Recursively copies a local directory's contents to a remote_path :param str localpath: the local path to copy (source) :param str remote_path: the remote path to copy to (target) :param bool confirm: whether to do a stat() on the file afterwards to confirm the file size :param bool preserve_mtime: *Default: False* - make the modification time(st_mtime) on the remote file match the time on the local. (st_atime can differ because stat'ing the localfile can/does update it's st_atime) :returns: None :raises IOError: if remote_path doesn't exist :raises OSError: if localpath doesn't exist """ logging_utils.log_func_details(localpath=localpath, remote_path=remote_path, confirm=confirm, preserve_mtime=preserve_mtime) wtcb = WTCallbacks() cur_local_dir = os.getcwd() os.chdir(localpath) walktree('.', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb) # restore local directory os.chdir(cur_local_dir) for dname in wtcb.dlist: if dname != '.': pth = reparent(remote_path, dname) if not self.is_dir(pth): self.mkdir(pth) for fname in wtcb.flist: head, _ = os.path.split(fname) if head not in wtcb.dlist: for subdir in path_advance(head): if subdir not in wtcb.dlist and subdir != '.': self.mkdir(reparent(remote_path, subdir)) wtcb.dlist = wtcb.dlist + [ subdir, ] src = os.path.join(localpath, fname) dest = reparent(remote_path, fname) # print('put', src, dest) self.put(src, dest, confirm=confirm, preserve_mtime=preserve_mtime)
def put_d(self, localpath, remotepath, confirm=True, preserve_mtime=False): """Copies a local directory's contents to a remotepath :param str localpath: the local path to copy (source) :param str remotepath: the remote path to copy to (target) :param bool confirm: whether to do a stat() on the file afterwards to confirm the file size :param bool preserve_mtime: *Default: False* - make the modification time(st_mtime) on the remote file match the time on the local. (st_atime can differ because stat'ing the localfile can/does update it's st_atime) :returns: None :raises IOError: if remotepath doesn't exist :raises OSError: if localpath doesn't exist """ self._sftp_connect() wtcb = WTCallbacks() cur_local_dir = os.getcwd() os.chdir(localpath) walktree('.', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb, recurse=False) for fname in wtcb.flist: src = os.path.join(localpath, fname) dest = reparent(remotepath, fname) # print('put', src, dest) self.put(src, dest, confirm=confirm, preserve_mtime=preserve_mtime) # restore local directory os.chdir(cur_local_dir)
def get_r(self, remotedir, localdir, preserve_mtime=False): """recursively copy remotedir structure to localdir :param str remotedir: the remote directory to copy from :param str localdir: the local directory to copy to :param bool preserve_mtime: *Default: False* - preserve modification time on files :returns: None :raises: """ logging_utils.log_func_details(remotedir=remotedir, localdir=localdir, preserve_mtime=preserve_mtime) wtcb = WTCallbacks() self.walktree(remotedir, wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb) # handle directories we recursed through for dname in wtcb.dlist: for subdir in path_advance(dname): try: os.mkdir(reparent(localdir, subdir)) # force result to a list for setter, wtcb.dlist = wtcb.dlist + [ subdir, ] except OSError: # dir exists pass for fname in wtcb.flist: # they may have told us to start down farther, so we may not have # recursed through some, ensure local dir structure matches head, _ = os.path.split(fname) if head not in wtcb.dlist: for subdir in path_advance(head): if subdir not in wtcb.dlist and subdir != '.': os.mkdir(reparent(localdir, subdir)) wtcb.dlist = wtcb.dlist + [ subdir, ] self.get(fname, reparent(localdir, fname), preserve_mtime=preserve_mtime)
def put_r(self, localpath, remotepath, confirm=True, preserve_mtime=False): """Recursively copies a local directory's contents to a remotepath :param str localpath: the local path to copy (source) :param str remotepath: the remote path to copy to (target) :param bool confirm: whether to do a stat() on the file afterwards to confirm the file size :param bool preserve_mtime: *Default: False* - make the modification time(st_mtime) on the remote file match the time on the local. (st_atime can differ because stat'ing the localfile can/does update it's st_atime) :returns: None :raises IOError: if remotepath doesn't exist :raises OSError: if localpath doesn't exist """ self._sftp_connect() wtcb = WTCallbacks() cur_local_dir = os.getcwd() os.chdir(localpath) walktree('.', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb) # restore local directory os.chdir(cur_local_dir) for dname in wtcb.dlist: if dname != '.': pth = reparent(remotepath, dname) if not self.isdir(pth): self.mkdir(pth) for fname in wtcb.flist: head, _ = os.path.split(fname) if head not in wtcb.dlist: for subdir in path_advance(head): if subdir not in wtcb.dlist and subdir != '.': self.mkdir(reparent(remotepath, subdir)) wtcb.dlist = wtcb.dlist + [subdir, ] src = os.path.join(localpath, fname) dest = reparent(remotepath, fname) # print('put', src, dest) self.put(src, dest, confirm=confirm, preserve_mtime=preserve_mtime)
def get_r(self, remotedir, localdir, preserve_mtime=False): """recursively copy remotedir structure to localdir :param str remotedir: the remote directory to copy from :param str localdir: the local directory to copy to :param bool preserve_mtime: *Default: False* - preserve modification time on files :returns: None :raises: """ self._sftp_connect() wtcb = WTCallbacks() self.walktree(remotedir, wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb) # handle directories we recursed through for dname in wtcb.dlist: for subdir in path_advance(dname): try: os.mkdir(reparent(localdir, subdir)) # force result to a list for setter, wtcb.dlist = wtcb.dlist + [subdir, ] except OSError: # dir exists pass for fname in wtcb.flist: # they may have told us to start down farther, so we may not have # recursed through some, ensure local dir structure matches head, _ = os.path.split(fname) if head not in wtcb.dlist: for subdir in path_advance(head): if subdir not in wtcb.dlist and subdir != '.': os.mkdir(reparent(localdir, subdir)) wtcb.dlist = wtcb.dlist + [subdir, ] self.get(fname, reparent(localdir, fname), preserve_mtime=preserve_mtime)
def main(config_file, logging_config): global logger logger = get_logger(logging_config) config = get_config(config_file) logger.info('Starting sync') num_files_synced = 0 num_bytes_synced = 0 start_time = None last_modified = None s3 = boto3.client('s3') bucket = config['s3']['bucket'] key_prefix = config['s3']['key_prefix'] if 'incremental_sync' in config: key = config['incremental_sync']['last_modified_s3_key'] try: response = s3.get_object(Bucket=bucket, Key=key) start_time = response['Body'].read().decode('utf-8') last_modified = start_time logger.info( 'Using incremental sync with start_time of {} from {}/{}'. format(start_time, bucket, key)) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] != 'NoSuchKey': logger.exception( 'Could not fetch last modified time S3 object - {}/{}'. format(bucket, key)) sys.exit(1) cnopts = pysftp.CnOpts() cnopts.compression = True cnopts.hostkeys = None with pysftp.Connection(config['sftp']['hostname'], username=config['sftp']['username'], password=config['sftp']['password'], cnopts=cnopts) as sftp: logger.info('Walking SFTP server structure') wtcb = WTCallbacks() sftp.walktree('/', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb) for fname in wtcb.flist: stats = sftp.sftp_client.stat(fname) mtime = str(stats.st_mtime) size = stats.st_size if start_time == None or mtime >= start_time: with sftp.sftp_client.file(fname) as file: if mtime == start_time: s3_hash = s3_md5(s3, bucket, key_prefix + fname) # if s3 object doesn't exist, don't bother hashing sftp file if s3_hash != None: logger.info( '{} modified time equals start_time, hash checking file' .format(fname)) file_hash = file_md5(file) else: file_hash = None if start_time == None or mtime > start_time or s3_hash != file_hash: logger.info('Syncing {} - {} mtime - {} bytes'.format( fname, mtime, size)) s3.put_object( Bucket=bucket, Key=key_prefix + fname, Body=file, Metadata={ 'sftp_mtime': mtime, 'sftp_sync_time': datetime.datetime.utcnow().isoformat() }) num_files_synced += 1 num_bytes_synced += size if 'incremental_sync' in config and (last_modified == None or mtime >= last_modified): last_modified = mtime if 'incremental_sync' in config and last_modified != None and last_modified != start_time: logger.info('Updating last_modified time {}'.format(last_modified)) s3.put_object( Bucket=bucket, Key=config['incremental_sync']['last_modified_s3_key'], Body=str(last_modified).encode('utf8')) logger.info('Synced {} files and {} bytes'.format( num_files_synced, num_bytes_synced))