Beispiel #1
0
    def put_r(self,
              localpath,
              remote_path,
              confirm=True,
              preserve_mtime=False):
        """Recursively copies a local directory's contents to a remote_path

        :param str localpath: the local path to copy (source)
        :param str remote_path:
            the remote path to copy to (target)
        :param bool confirm:
            whether to do a stat() on the file afterwards to confirm the file
            size
        :param bool preserve_mtime:
            *Default: False* - make the modification time(st_mtime) on the
            remote file match the time on the local. (st_atime can differ
            because stat'ing the localfile can/does update it's st_atime)

        :returns: None

        :raises IOError: if remote_path doesn't exist
        :raises OSError: if localpath doesn't exist
        """
        logging_utils.log_func_details(localpath=localpath,
                                       remote_path=remote_path,
                                       confirm=confirm,
                                       preserve_mtime=preserve_mtime)
        wtcb = WTCallbacks()
        cur_local_dir = os.getcwd()
        os.chdir(localpath)
        walktree('.', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb)
        # restore local directory
        os.chdir(cur_local_dir)
        for dname in wtcb.dlist:
            if dname != '.':
                pth = reparent(remote_path, dname)
                if not self.is_dir(pth):
                    self.mkdir(pth)

        for fname in wtcb.flist:
            head, _ = os.path.split(fname)
            if head not in wtcb.dlist:
                for subdir in path_advance(head):
                    if subdir not in wtcb.dlist and subdir != '.':
                        self.mkdir(reparent(remote_path, subdir))
                        wtcb.dlist = wtcb.dlist + [
                            subdir,
                        ]
            src = os.path.join(localpath, fname)
            dest = reparent(remote_path, fname)
            # print('put', src, dest)
            self.put(src, dest, confirm=confirm, preserve_mtime=preserve_mtime)
Beispiel #2
0
    def put_d(self, localpath, remotepath, confirm=True, preserve_mtime=False):
        """Copies a local directory's contents to a remotepath

        :param str localpath: the local path to copy (source)
        :param str remotepath:
            the remote path to copy to (target)
        :param bool confirm:
            whether to do a stat() on the file afterwards to confirm the file
            size
        :param bool preserve_mtime:
            *Default: False* - make the modification time(st_mtime) on the
            remote file match the time on the local. (st_atime can differ
            because stat'ing the localfile can/does update it's st_atime)

        :returns: None

        :raises IOError: if remotepath doesn't exist
        :raises OSError: if localpath doesn't exist
        """
        self._sftp_connect()
        wtcb = WTCallbacks()
        cur_local_dir = os.getcwd()
        os.chdir(localpath)
        walktree('.', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb, recurse=False)
        for fname in wtcb.flist:
            src = os.path.join(localpath, fname)
            dest = reparent(remotepath, fname)
            # print('put', src, dest)
            self.put(src, dest, confirm=confirm, preserve_mtime=preserve_mtime)

        # restore local directory
        os.chdir(cur_local_dir)
Beispiel #3
0
    def get_r(self, remotedir, localdir, preserve_mtime=False):
        """recursively copy remotedir structure to localdir

        :param str remotedir: the remote directory to copy from
        :param str localdir: the local directory to copy to
        :param bool preserve_mtime: *Default: False* -
            preserve modification time on files

        :returns: None

        :raises:

        """
        logging_utils.log_func_details(remotedir=remotedir,
                                       localdir=localdir,
                                       preserve_mtime=preserve_mtime)
        wtcb = WTCallbacks()
        self.walktree(remotedir, wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb)
        # handle directories we recursed through
        for dname in wtcb.dlist:
            for subdir in path_advance(dname):
                try:
                    os.mkdir(reparent(localdir, subdir))
                    # force result to a list for setter,
                    wtcb.dlist = wtcb.dlist + [
                        subdir,
                    ]
                except OSError:  # dir exists
                    pass

        for fname in wtcb.flist:
            # they may have told us to start down farther, so we may not have
            # recursed through some, ensure local dir structure matches
            head, _ = os.path.split(fname)
            if head not in wtcb.dlist:
                for subdir in path_advance(head):
                    if subdir not in wtcb.dlist and subdir != '.':
                        os.mkdir(reparent(localdir, subdir))
                        wtcb.dlist = wtcb.dlist + [
                            subdir,
                        ]

            self.get(fname,
                     reparent(localdir, fname),
                     preserve_mtime=preserve_mtime)
Beispiel #4
0
    def put_r(self, localpath, remotepath, confirm=True, preserve_mtime=False):
        """Recursively copies a local directory's contents to a remotepath

        :param str localpath: the local path to copy (source)
        :param str remotepath:
            the remote path to copy to (target)
        :param bool confirm:
            whether to do a stat() on the file afterwards to confirm the file
            size
        :param bool preserve_mtime:
            *Default: False* - make the modification time(st_mtime) on the
            remote file match the time on the local. (st_atime can differ
            because stat'ing the localfile can/does update it's st_atime)

        :returns: None

        :raises IOError: if remotepath doesn't exist
        :raises OSError: if localpath doesn't exist
        """
        self._sftp_connect()
        wtcb = WTCallbacks()
        cur_local_dir = os.getcwd()
        os.chdir(localpath)
        walktree('.', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb)
        # restore local directory
        os.chdir(cur_local_dir)
        for dname in wtcb.dlist:
            if dname != '.':
                pth = reparent(remotepath, dname)
                if not self.isdir(pth):
                    self.mkdir(pth)

        for fname in wtcb.flist:
            head, _ = os.path.split(fname)
            if head not in wtcb.dlist:
                for subdir in path_advance(head):
                    if subdir not in wtcb.dlist and subdir != '.':
                        self.mkdir(reparent(remotepath, subdir))
                        wtcb.dlist = wtcb.dlist + [subdir, ]
            src = os.path.join(localpath, fname)
            dest = reparent(remotepath, fname)
            # print('put', src, dest)
            self.put(src, dest, confirm=confirm, preserve_mtime=preserve_mtime)
Beispiel #5
0
    def get_r(self, remotedir, localdir, preserve_mtime=False):
        """recursively copy remotedir structure to localdir

        :param str remotedir: the remote directory to copy from
        :param str localdir: the local directory to copy to
        :param bool preserve_mtime: *Default: False* -
            preserve modification time on files

        :returns: None

        :raises:

        """
        self._sftp_connect()
        wtcb = WTCallbacks()
        self.walktree(remotedir, wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb)
        # handle directories we recursed through
        for dname in wtcb.dlist:
            for subdir in path_advance(dname):
                try:
                    os.mkdir(reparent(localdir, subdir))
                    # force result to a list for setter,
                    wtcb.dlist = wtcb.dlist + [subdir, ]
                except OSError:     # dir exists
                    pass

        for fname in wtcb.flist:
            # they may have told us to start down farther, so we may not have
            # recursed through some, ensure local dir structure matches
            head, _ = os.path.split(fname)
            if head not in wtcb.dlist:
                for subdir in path_advance(head):
                    if subdir not in wtcb.dlist and subdir != '.':
                        os.mkdir(reparent(localdir, subdir))
                        wtcb.dlist = wtcb.dlist + [subdir, ]

            self.get(fname,
                     reparent(localdir, fname),
                     preserve_mtime=preserve_mtime)
Beispiel #6
0
def main(config_file, logging_config):
    global logger

    logger = get_logger(logging_config)
    config = get_config(config_file)

    logger.info('Starting sync')

    num_files_synced = 0
    num_bytes_synced = 0

    start_time = None
    last_modified = None

    s3 = boto3.client('s3')
    bucket = config['s3']['bucket']
    key_prefix = config['s3']['key_prefix']

    if 'incremental_sync' in config:
        key = config['incremental_sync']['last_modified_s3_key']
        try:
            response = s3.get_object(Bucket=bucket, Key=key)
            start_time = response['Body'].read().decode('utf-8')
            last_modified = start_time
            logger.info(
                'Using incremental sync with start_time of {} from {}/{}'.
                format(start_time, bucket, key))
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] != 'NoSuchKey':
                logger.exception(
                    'Could not fetch last modified time S3 object - {}/{}'.
                    format(bucket, key))
                sys.exit(1)

    cnopts = pysftp.CnOpts()
    cnopts.compression = True
    cnopts.hostkeys = None

    with pysftp.Connection(config['sftp']['hostname'],
                           username=config['sftp']['username'],
                           password=config['sftp']['password'],
                           cnopts=cnopts) as sftp:

        logger.info('Walking SFTP server structure')
        wtcb = WTCallbacks()
        sftp.walktree('/', wtcb.file_cb, wtcb.dir_cb, wtcb.unk_cb)

        for fname in wtcb.flist:
            stats = sftp.sftp_client.stat(fname)

            mtime = str(stats.st_mtime)
            size = stats.st_size

            if start_time == None or mtime >= start_time:
                with sftp.sftp_client.file(fname) as file:
                    if mtime == start_time:
                        s3_hash = s3_md5(s3, bucket, key_prefix + fname)

                        # if s3 object doesn't exist, don't bother hashing sftp file
                        if s3_hash != None:
                            logger.info(
                                '{} modified time equals start_time, hash checking file'
                                .format(fname))
                            file_hash = file_md5(file)
                        else:
                            file_hash = None

                    if start_time == None or mtime > start_time or s3_hash != file_hash:
                        logger.info('Syncing {} - {} mtime - {} bytes'.format(
                            fname, mtime, size))

                        s3.put_object(
                            Bucket=bucket,
                            Key=key_prefix + fname,
                            Body=file,
                            Metadata={
                                'sftp_mtime':
                                mtime,
                                'sftp_sync_time':
                                datetime.datetime.utcnow().isoformat()
                            })

                        num_files_synced += 1
                        num_bytes_synced += size

            if 'incremental_sync' in config and (last_modified == None
                                                 or mtime >= last_modified):
                last_modified = mtime

        if 'incremental_sync' in config and last_modified != None and last_modified != start_time:
            logger.info('Updating last_modified time {}'.format(last_modified))
            s3.put_object(
                Bucket=bucket,
                Key=config['incremental_sync']['last_modified_s3_key'],
                Body=str(last_modified).encode('utf8'))

        logger.info('Synced {} files and {} bytes'.format(
            num_files_synced, num_bytes_synced))