Beispiel #1
0
Datei: s3.py Projekt: ying-w/stor
    def upload(self,
               source,
               condition=None,
               use_manifest=False,
               headers=None,
               **kwargs):
        """Uploads a list of files and directories to s3.

        Note that the S3Path is treated as a directory.

        Note that for user-provided OBSUploadObjects, an empty directory's destination
        must have a trailing slash.

        Args:
            source (List[str|OBSUploadObject]): A list of source files, directories, and
                OBSUploadObjects to upload to S3.
            condition (function(results) -> bool): The method will only return
                when the results of upload matches the condition.
            use_manifest (bool): Generate a data manifest and validate the upload results
                are in the manifest.
            headers (dict): A dictionary of object headers to apply to the object.
                Headers will not be applied to OBSUploadObjects and any headers
                specified by an OBSUploadObject will override these headers.
                Headers should be specified as key-value pairs,
                e.g. {'ContentLanguage': 'en'}

        Returns:
            List[S3Path]: A list of the uploaded files as S3Paths.

        Notes:

        - This method uploads to paths relative to the current
          directory.
        """
        if use_manifest and not (len(source) == 1
                                 and os.path.isdir(source[0])):
            raise ValueError(
                'can only upload one directory with use_manifest=True')
        utils.validate_condition(condition)

        files_to_convert = utils.walk_files_and_dirs(
            [name for name in source if not isinstance(name, OBSUploadObject)])
        files_to_upload = [
            obj for obj in source if isinstance(obj, OBSUploadObject)
        ]

        manifest_file_name = (Path(source[0]) / utils.DATA_MANIFEST_FILE_NAME
                              if use_manifest else None)
        resource_base = self.resource or Path('')
        files_to_upload.extend([
            OBSUploadObject(
                name,
                resource_base /
                (utils.with_trailing_slash(
                    utils.file_name_to_object_name(name)) if
                 Path(name).isdir() else utils.file_name_to_object_name(name)),
                options={'headers': headers} if headers else None)
            for name in files_to_convert if name != manifest_file_name
        ])

        if use_manifest:
            # Generate the data manifest and save it remotely
            object_names = [o.object_name for o in files_to_upload]
            utils.generate_and_save_data_manifest(source[0], object_names)
            manifest_obj_name = resource_base / utils.file_name_to_object_name(
                manifest_file_name)
            manifest_obj = OBSUploadObject(
                str(manifest_file_name),
                manifest_obj_name,
                options={'headers': headers} if headers else None)
            self._upload_object(manifest_obj)

            # Make a condition for validating the upload
            manifest_cond = partial(utils.validate_manifest_list, object_names)
            condition = (utils.join_conditions(condition, manifest_cond)
                         if condition else manifest_cond)

        options = settings.get()['s3:upload']
        segment_size = utils.str_to_bytes(options.get('segment_size'))
        transfer_config = {
            'multipart_threshold': segment_size,
            'max_concurrency': options.get('segment_threads'),
            'multipart_chunksize': segment_size
        }
        upload_w_config = partial(self._upload_object, config=transfer_config)

        uploaded = {'completed': [], 'failed': []}
        with S3UploadLogger(len(files_to_upload)) as ul:
            pool = ThreadPool(options['object_threads'])
            try:
                result_iter = pool.imap_unordered(upload_w_config,
                                                  files_to_upload)
                while True:
                    try:
                        result = result_iter.next(0xFFFF)
                        if result['success']:
                            ul.add_result(result)
                            uploaded['completed'].append(result)
                        else:
                            uploaded['failed'].append(result)
                    except StopIteration:
                        break
                pool.close()
            except BaseException:
                pool.terminate()
                raise
            finally:
                pool.join()

        if uploaded['failed']:
            raise exceptions.FailedUploadError(
                'an error occurred while uploading', uploaded)

        utils.check_condition(condition,
                              [r['dest'] for r in uploaded['completed']])
        return uploaded
Beispiel #2
0
Datei: s3.py Projekt: ying-w/stor
    def download(self, dest, condition=None, use_manifest=False, **kwargs):
        """Downloads a directory from S3 to a destination directory.

        Args:
            dest (str): The destination path to download file to. If downloading to a directory,
                there must be a trailing slash. The directory will be created if it doesn't exist.
            condition (function(results) -> bool): The method will only return
                when the results of download matches the condition.

        Returns:
            List[S3Path]: A list of the downloaded objects.

        Notes:
        - The destination directory will be created automatically if it doesn't exist.
        - This method downloads to paths relative to the current directory.
        """
        utils.validate_condition(condition)

        if use_manifest:
            object_names = utils.get_data_manifest_contents(self)
            manifest_cond = partial(utils.validate_manifest_list, object_names)
            condition = (utils.join_conditions(condition, manifest_cond)
                         if condition else manifest_cond)

        source = utils.with_trailing_slash(self)
        files_to_download = [{
            'source': file,
            'dest': dest
        } for file in source.list()]

        options = settings.get()['s3:download']
        segment_size = utils.str_to_bytes(options.get('segment_size'))
        transfer_config = {
            'multipart_threshold': segment_size,
            'max_concurrency': options.get('segment_threads'),
            'multipart_chunksize': segment_size
        }
        download_w_config = partial(self._download_object_worker,
                                    config=transfer_config)

        downloaded = {'completed': [], 'failed': []}
        with S3DownloadLogger(len(files_to_download)) as dl:
            pool = ThreadPool(options['object_threads'])
            try:
                result_iter = pool.imap_unordered(download_w_config,
                                                  files_to_download)
                while True:
                    try:
                        result = result_iter.next(0xFFFF)
                        if result['success']:
                            dl.add_result(result)
                            downloaded['completed'].append(result)
                        else:
                            downloaded['failed'].append(result)
                    except StopIteration:
                        break
                pool.close()
            except BaseException:
                pool.terminate()
                raise
            finally:
                pool.join()

        if downloaded['failed']:
            raise exceptions.FailedDownloadError(
                'an error occurred while downloading', downloaded)

        utils.check_condition(condition,
                              [r['source'] for r in downloaded['completed']])
        return downloaded
Beispiel #3
0
 def test_str_to_bytes_invalid_units(self):
     with self.assertRaises(ValueError):
         utils.str_to_bytes('10L')
Beispiel #4
0
 def test_str_to_bytes_invalid_str_long(self):
     with self.assertRaises(ValueError):
         utils.str_to_bytes('wrongM')
Beispiel #5
0
 def test_str_to_bytes_invalid_str_short(self):
     with self.assertRaises(ValueError):
         utils.str_to_bytes('M')
Beispiel #6
0
 def test_str_to_bytes_int(self):
     self.assertEquals(5, utils.str_to_bytes(5))